github.com/swiftstack/ProxyFS@v0.0.0-20210203235616-4017c267d62f/inode/inode.go (about) 1 // Copyright (c) 2015-2021, NVIDIA CORPORATION. 2 // SPDX-License-Identifier: Apache-2.0 3 4 package inode 5 6 import ( 7 "container/list" 8 "encoding/json" 9 "fmt" 10 "runtime/debug" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/ansel1/merry" 16 "github.com/swiftstack/cstruct" 17 "github.com/swiftstack/sortedmap" 18 19 "github.com/swiftstack/ProxyFS/blunder" 20 "github.com/swiftstack/ProxyFS/dlm" 21 "github.com/swiftstack/ProxyFS/evtlog" 22 "github.com/swiftstack/ProxyFS/halter" 23 "github.com/swiftstack/ProxyFS/headhunter" 24 "github.com/swiftstack/ProxyFS/logger" 25 "github.com/swiftstack/ProxyFS/stats" 26 "github.com/swiftstack/ProxyFS/swiftclient" 27 "github.com/swiftstack/ProxyFS/trackedlock" 28 "github.com/swiftstack/ProxyFS/utils" 29 ) 30 31 // Shorthand for inode internal API debug log id; global to the package 32 var int_inode_debug = logger.DbgInodeInternal 33 34 const ( 35 optimisticInodeFetchBytes = 2048 36 ) 37 38 type CorruptionDetected bool 39 type Version uint64 40 41 const ( 42 V1 Version = iota + 1 // use type/struct onDiskInodeV1Struct 43 onDiskInodeV1PayloadObjectOffset uint64 = 0 44 ) 45 46 type onDiskInodeV1Struct struct { // Preceded "on disk" by CorruptionDetected then Version both in cstruct.LittleEndian form 47 InodeNumber 48 InodeType 49 LinkCount uint64 50 Size uint64 51 CreationTime time.Time 52 ModificationTime time.Time 53 AccessTime time.Time 54 AttrChangeTime time.Time 55 NumWrites uint64 56 Mode InodeMode 57 UserID InodeUserID 58 GroupID InodeGroupID 59 StreamMap map[string][]byte 60 PayloadObjectNumber uint64 // DirInode: B+Tree Root with Key == dir_entry_name, Value = InodeNumber 61 PayloadObjectLength uint64 // FileInode: B+Tree Root with Key == fileOffset, Value = fileExtent 62 SymlinkTarget string // SymlinkInode: target path of symbolic link 63 LogSegmentMap map[uint64]uint64 // FileInode: Key == LogSegment#, Value = file user data byte count 64 } 65 66 type inFlightLogSegmentStruct struct { // Used as (by reference) Value for inMemoryInodeStruct.inFlightLogSegmentMap 67 logSegmentNumber uint64 // Used as (by value) Key for inMemoryInodeStruct.inFlightLogSegmentMap 68 openLogSegmentLRUNext *inFlightLogSegmentStruct 69 openLogSegmentLRUPrev *inFlightLogSegmentStruct 70 fileInode *inMemoryInodeStruct 71 accountName string 72 containerName string 73 objectName string 74 openLogSegmentListElement list.Element 75 swiftclient.ChunkedPutContext 76 } 77 78 type inMemoryInodeStruct struct { 79 trackedlock.Mutex // Used to synchronize with background fileInodeFlusherDaemon 80 sync.WaitGroup // FileInode Flush requests wait on this 81 inodeCacheLRUNext *inMemoryInodeStruct 82 inodeCacheLRUPrev *inMemoryInodeStruct 83 dirty bool 84 volume *volumeStruct 85 snapShotID uint64 86 payload interface{} // DirInode: B+Tree with Key == dir_entry_name, Value = InodeNumber 87 // FileInode: B+Tree with Key == fileOffset, Value = *fileExtent 88 openLogSegment *inFlightLogSegmentStruct // FileInode only... also in inFlightLogSegmentMap 89 inFlightLogSegmentMap map[uint64]*inFlightLogSegmentStruct // FileInode: key == logSegmentNumber 90 inFlightLogSegmentErrors map[uint64]error // FileInode: key == logSegmentNumber; value == err (if non nil) 91 onDiskInodeV1Struct // Real on-disk inode information embedded here 92 } 93 94 func (vS *volumeStruct) DumpKey(key sortedmap.Key) (keyAsString string, err error) { 95 keyAsInodeNumber, ok := key.(InodeNumber) 96 if !ok { 97 err = fmt.Errorf("inode.volumeStruct.DumpKey() could not parse key as a InodeNumber") 98 return 99 } 100 101 keyAsString = fmt.Sprintf("0x%016X", keyAsInodeNumber) 102 103 err = nil 104 return 105 } 106 107 func (vS *volumeStruct) DumpValue(value sortedmap.Value) (valueAsString string, err error) { 108 valueAsInMemoryInodeStructPtr, ok := value.(*inMemoryInodeStruct) 109 if !ok { 110 err = fmt.Errorf("inode.volumeStruct.DumpValue() could not parse value as a *inMemoryInodeStruct") 111 return 112 } 113 114 valueAsString = fmt.Sprintf("%016p", valueAsInMemoryInodeStructPtr) 115 116 err = nil 117 return 118 } 119 120 func compareInodeNumber(key1 sortedmap.Key, key2 sortedmap.Key) (result int, err error) { 121 key1InodeNumber, ok := key1.(InodeNumber) 122 if !ok { 123 err = fmt.Errorf("compareInodeNumber(non-InodeNumber,) not supported") 124 return 125 } 126 key2InodeNumber, ok := key2.(InodeNumber) 127 if !ok { 128 err = fmt.Errorf("compareInodeNumber(InodeNumber, non-InodeNumber) not supported") 129 return 130 } 131 132 if key1InodeNumber < key2InodeNumber { 133 result = -1 134 } else if key1InodeNumber == key2InodeNumber { 135 result = 0 136 } else { // key1InodeNumber > key2InodeNumber 137 result = 1 138 } 139 140 err = nil 141 142 return 143 } 144 145 func setRWMode(rwMode RWModeType) (err error) { 146 if rwMode != globals.rwMode { 147 switch rwMode { 148 case RWModeNormal: 149 stats.IncrementOperations(&stats.ReconCheckTriggeredNormalMode) 150 case RWModeNoWrite: 151 stats.IncrementOperations(&stats.ReconCheckTriggeredNoWriteMode) 152 case RWModeReadOnly: 153 stats.IncrementOperations(&stats.ReconCheckTriggeredReadOnlyMode) 154 default: 155 err = fmt.Errorf("SetRWMode(rwMode==%d) not allowed... must be one of RWModeNormal(%d), RWModeNoWrite(%d), or RWModeReadOnly(%d)", rwMode, RWModeNormal, RWModeNoWrite, RWModeReadOnly) 156 return 157 } 158 159 globals.rwMode = rwMode 160 } 161 162 err = nil 163 return 164 } 165 166 func enforceRWMode(enforceNoWriteMode bool) (err error) { 167 var ( 168 rwModeCopy RWModeType 169 ) 170 171 rwModeCopy = globals.rwMode 172 173 if rwModeCopy == RWModeReadOnly { 174 err = blunder.NewError(globals.readOnlyThresholdErrno, globals.readOnlyThresholdErrnoString) 175 } else if enforceNoWriteMode && (rwModeCopy == RWModeNoWrite) { 176 err = blunder.NewError(globals.noWriteThresholdErrno, globals.noWriteThresholdErrnoString) 177 } else { 178 err = nil 179 } 180 181 return 182 } 183 184 func (vS *volumeStruct) FetchOnDiskInode(inodeNumber InodeNumber) (corruptionDetected CorruptionDetected, version Version, onDiskInode []byte, err error) { 185 var ( 186 bytesConsumedByCorruptionDetected uint64 187 bytesConsumedByVersion uint64 188 inodeRec []byte 189 ok bool 190 ) 191 192 corruptionDetected = CorruptionDetected(false) 193 version = Version(0) 194 onDiskInode = make([]byte, 0) 195 196 inodeRec, ok, err = vS.headhunterVolumeHandle.GetInodeRec(uint64(inodeNumber)) 197 if nil != err { 198 err = fmt.Errorf("headhunter.GetInodeRec() failed: %v", err) 199 return 200 } 201 if !ok { 202 err = fmt.Errorf("headhunter.GetInodeRec() returned !ok") 203 return 204 } 205 206 bytesConsumedByCorruptionDetected, err = cstruct.Unpack(inodeRec, &corruptionDetected, cstruct.LittleEndian) 207 if nil != err { 208 err = fmt.Errorf("cstruct.Unpack(,&corruptionDetected,) failed: %v", err) 209 return 210 } 211 if corruptionDetected { 212 return 213 } 214 215 bytesConsumedByVersion, err = cstruct.Unpack(inodeRec[bytesConsumedByCorruptionDetected:], &version, cstruct.LittleEndian) 216 if nil != err { 217 err = fmt.Errorf("cstruct.Unpack(,&version,) failed: %v", err) 218 return 219 } 220 221 onDiskInode = inodeRec[bytesConsumedByCorruptionDetected+bytesConsumedByVersion:] 222 223 return 224 } 225 226 func (vS *volumeStruct) fetchOnDiskInode(inodeNumber InodeNumber) (inMemoryInode *inMemoryInodeStruct, ok bool, err error) { 227 var ( 228 bytesConsumedByCorruptionDetected uint64 229 bytesConsumedByVersion uint64 230 corruptionDetected CorruptionDetected 231 inodeRec []byte 232 onDiskInodeV1 *onDiskInodeV1Struct 233 snapShotID uint64 234 snapShotIDType headhunter.SnapShotIDType 235 version Version 236 ) 237 238 snapShotIDType, snapShotID, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 239 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 240 logger.Fatalf("fetchOnDiskInode for headhunter.SnapShotIDTypeDotSnapShot not allowed") 241 } 242 243 inodeRec, ok, err = vS.headhunterVolumeHandle.GetInodeRec(uint64(inodeNumber)) 244 if nil != err { 245 stackStr := string(debug.Stack()) 246 err = fmt.Errorf("%s: unable to get inodeRec for inode %d: %v stack: %s", 247 utils.GetFnName(), inodeNumber, err, stackStr) 248 err = blunder.AddError(err, blunder.NotFoundError) 249 return 250 } 251 if !ok { 252 return 253 } 254 255 bytesConsumedByCorruptionDetected, err = cstruct.Unpack(inodeRec, &corruptionDetected, cstruct.LittleEndian) 256 if nil != err { 257 err = fmt.Errorf("%s: unable to parse inodeRec.CorruptionDetected for inode %d: %v", utils.GetFnName(), inodeNumber, err) 258 err = blunder.AddError(err, blunder.CorruptInodeError) 259 return 260 } 261 if corruptionDetected { 262 err = fmt.Errorf("%s: inode %d has been marked corrupted", utils.GetFnName(), inodeNumber) 263 err = blunder.AddError(err, blunder.CorruptInodeError) 264 return 265 } 266 267 bytesConsumedByVersion, err = cstruct.Unpack(inodeRec[bytesConsumedByCorruptionDetected:], &version, cstruct.LittleEndian) 268 if nil != err { 269 err = fmt.Errorf("%s: unable to get inodeRec.Version for inode %d: %v", utils.GetFnName(), inodeNumber, err) 270 err = blunder.AddError(err, blunder.CorruptInodeError) 271 return 272 } 273 if V1 != version { 274 err = fmt.Errorf("%s: inodeRec.Version for inode %d (%v) not supported", utils.GetFnName(), inodeNumber, version) 275 err = blunder.AddError(err, blunder.CorruptInodeError) 276 return 277 } 278 279 onDiskInodeV1 = &onDiskInodeV1Struct{StreamMap: make(map[string][]byte)} 280 281 err = json.Unmarshal(inodeRec[bytesConsumedByCorruptionDetected+bytesConsumedByVersion:], onDiskInodeV1) 282 if nil != err { 283 err = fmt.Errorf("%s: inodeRec.<body> for inode %d json.Unmarshal() failed: %v", utils.GetFnName(), inodeNumber, err) 284 err = blunder.AddError(err, blunder.CorruptInodeError) 285 return 286 } 287 288 inMemoryInode = &inMemoryInodeStruct{ 289 inodeCacheLRUNext: nil, 290 inodeCacheLRUPrev: nil, 291 dirty: false, 292 volume: vS, 293 snapShotID: snapShotID, 294 openLogSegment: nil, 295 inFlightLogSegmentMap: make(map[uint64]*inFlightLogSegmentStruct), 296 inFlightLogSegmentErrors: make(map[uint64]error), 297 onDiskInodeV1Struct: *onDiskInodeV1, 298 } 299 300 inMemoryInode.onDiskInodeV1Struct.InodeNumber = inodeNumber 301 302 switch inMemoryInode.InodeType { 303 case DirType: 304 if 0 == inMemoryInode.PayloadObjectNumber { 305 inMemoryInode.payload = 306 sortedmap.NewBPlusTree( 307 vS.maxEntriesPerDirNode, 308 sortedmap.CompareString, 309 &dirInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}}, 310 globals.dirEntryCache) 311 } else { 312 inMemoryInode.payload, err = 313 sortedmap.OldBPlusTree( 314 inMemoryInode.PayloadObjectNumber, 315 onDiskInodeV1PayloadObjectOffset, 316 inMemoryInode.PayloadObjectLength, 317 sortedmap.CompareString, 318 &dirInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}}, 319 globals.dirEntryCache) 320 if nil != err { 321 err = fmt.Errorf("%s: sortedmap.OldBPlusTree(inodeRec.<body>.PayloadObjectNumber) for DirType inode %d failed: %v", utils.GetFnName(), inodeNumber, err) 322 err = blunder.AddError(err, blunder.CorruptInodeError) 323 return 324 } 325 } 326 case FileType: 327 if 0 == inMemoryInode.PayloadObjectNumber { 328 inMemoryInode.payload = 329 sortedmap.NewBPlusTree( 330 vS.maxExtentsPerFileNode, 331 sortedmap.CompareUint64, 332 &fileInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}}, 333 globals.fileExtentMapCache) 334 } else { 335 inMemoryInode.payload, err = 336 sortedmap.OldBPlusTree( 337 inMemoryInode.PayloadObjectNumber, 338 onDiskInodeV1PayloadObjectOffset, 339 inMemoryInode.PayloadObjectLength, 340 sortedmap.CompareUint64, 341 &fileInodeCallbacks{treeNodeLoadable{inode: inMemoryInode}}, 342 globals.fileExtentMapCache) 343 if nil != err { 344 err = fmt.Errorf("%s: sortedmap.OldBPlusTree(inodeRec.<body>.PayloadObjectNumber) for FileType inode %d failed: %v", utils.GetFnName(), inodeNumber, err) 345 err = blunder.AddError(err, blunder.CorruptInodeError) 346 return 347 } 348 } 349 case SymlinkType: 350 // Nothing special here 351 default: 352 err = fmt.Errorf("%s: inodeRec.InodeType for inode %d (%v) not supported", utils.GetFnName(), inodeNumber, inMemoryInode.InodeType) 353 err = blunder.AddError(err, blunder.CorruptInodeError) 354 return 355 } 356 357 err = nil 358 return 359 } 360 361 func (vS *volumeStruct) inodeCacheFetchWhileLocked(inodeNumber InodeNumber) (inode *inMemoryInodeStruct, ok bool, err error) { 362 var ( 363 inodeAsValue sortedmap.Value 364 ) 365 366 inodeAsValue, ok, err = vS.inodeCache.GetByKey(inodeNumber) 367 if nil != err { 368 return 369 } 370 371 if ok { 372 inode, ok = inodeAsValue.(*inMemoryInodeStruct) 373 if ok { 374 vS.inodeCacheTouchWhileLocked(inode) 375 err = nil 376 } else { 377 ok = false 378 err = fmt.Errorf("inodeCache[inodeNumber==0x%016X] contains a value not mappable to a *inMemoryInodeStruct", inodeNumber) 379 } 380 } 381 382 return 383 } 384 385 func (vS *volumeStruct) inodeCacheFetch(inodeNumber InodeNumber) (inode *inMemoryInodeStruct, ok bool, err error) { 386 vS.Lock() 387 inode, ok, err = vS.inodeCacheFetchWhileLocked(inodeNumber) 388 vS.Unlock() 389 return 390 } 391 392 func (vS *volumeStruct) inodeCacheInsertWhileLocked(inode *inMemoryInodeStruct) (ok bool, err error) { 393 ok, err = vS.inodeCache.Put(inode.InodeNumber, inode) 394 if (nil != err) || !ok { 395 return 396 } 397 398 // Place inode at the MRU end of inodeCacheLRU 399 400 if 0 == vS.inodeCacheLRUItems { 401 vS.inodeCacheLRUHead = inode 402 vS.inodeCacheLRUTail = inode 403 vS.inodeCacheLRUItems = 1 404 } else { 405 inode.inodeCacheLRUPrev = vS.inodeCacheLRUTail 406 inode.inodeCacheLRUPrev.inodeCacheLRUNext = inode 407 408 vS.inodeCacheLRUTail = inode 409 vS.inodeCacheLRUItems++ 410 } 411 412 return 413 } 414 415 func (vS *volumeStruct) inodeCacheInsert(inode *inMemoryInodeStruct) (ok bool, err error) { 416 vS.Lock() 417 ok, err = vS.inodeCacheInsertWhileLocked(inode) 418 vS.Unlock() 419 return 420 } 421 422 func (vS *volumeStruct) inodeCacheTouchWhileLocked(inode *inMemoryInodeStruct) { 423 // Move inode to the MRU end of inodeCacheLRU 424 425 if inode != vS.inodeCacheLRUTail { 426 if inode == vS.inodeCacheLRUHead { 427 vS.inodeCacheLRUHead = inode.inodeCacheLRUNext 428 vS.inodeCacheLRUHead.inodeCacheLRUPrev = nil 429 430 inode.inodeCacheLRUPrev = vS.inodeCacheLRUTail 431 inode.inodeCacheLRUNext = nil 432 433 vS.inodeCacheLRUTail.inodeCacheLRUNext = inode 434 vS.inodeCacheLRUTail = inode 435 } else { 436 inode.inodeCacheLRUPrev.inodeCacheLRUNext = inode.inodeCacheLRUNext 437 inode.inodeCacheLRUNext.inodeCacheLRUPrev = inode.inodeCacheLRUPrev 438 439 inode.inodeCacheLRUNext = nil 440 inode.inodeCacheLRUPrev = vS.inodeCacheLRUTail 441 442 vS.inodeCacheLRUTail.inodeCacheLRUNext = inode 443 vS.inodeCacheLRUTail = inode 444 } 445 } 446 } 447 448 func (vS *volumeStruct) inodeCacheTouch(inode *inMemoryInodeStruct) { 449 vS.Lock() 450 vS.inodeCacheTouchWhileLocked(inode) 451 vS.Unlock() 452 } 453 454 // The inode cache discard thread calls this routine when the ticker goes off. 455 func (vS *volumeStruct) inodeCacheDiscard() (discarded uint64, dirty uint64, locked uint64, lruItems uint64) { 456 inodesToDrop := uint64(0) 457 458 vS.Lock() 459 460 if (vS.inodeCacheLRUItems * globals.inodeSize) > vS.inodeCacheLRUMaxBytes { 461 // Check, at most, 1.25 * (minimum_number_to_drop) 462 inodesToDrop = (vS.inodeCacheLRUItems * globals.inodeSize) - vS.inodeCacheLRUMaxBytes 463 inodesToDrop = inodesToDrop / globals.inodeSize 464 inodesToDrop += inodesToDrop / 4 465 for (inodesToDrop > 0) && ((vS.inodeCacheLRUItems * globals.inodeSize) > vS.inodeCacheLRUMaxBytes) { 466 inodesToDrop-- 467 468 ic := vS.inodeCacheLRUHead 469 470 // Create a DLM lock object 471 id := dlm.GenerateCallerID() 472 inodeRWLock, _ := vS.InitInodeLock(ic.InodeNumber, id) 473 err := inodeRWLock.TryWriteLock() 474 475 // Inode is locked; skip it 476 if err != nil { 477 // Move inode to tail of LRU 478 vS.inodeCacheTouchWhileLocked(ic) 479 locked++ 480 continue 481 } 482 483 if ic.dirty { 484 // The inode is busy - drop the DLM lock and move to tail 485 inodeRWLock.Unlock() 486 dirty++ 487 vS.inodeCacheTouchWhileLocked(ic) 488 continue 489 } 490 491 var ok bool 492 493 discarded++ 494 ok, err = vS.inodeCacheDropWhileLocked(ic) 495 if err != nil || !ok { 496 pStr := fmt.Errorf("The inodes was not found in the inode cache - ok: %v err: %v", ok, err) 497 panic(pStr) 498 } 499 500 inodeRWLock.Unlock() 501 502 // NOTE: vS.inodeCacheDropWhileLocked() removed the inode from the LRU list so 503 // the head is now different 504 } 505 } 506 lruItems = vS.inodeCacheLRUItems 507 vS.Unlock() 508 //logger.Infof("discard: %v dirty: %v locked: %v LRUitems: %v", discarded, dirty, locked, lruItems) 509 return 510 } 511 512 func (vS *volumeStruct) inodeCacheDropWhileLocked(inode *inMemoryInodeStruct) (ok bool, err error) { 513 ok, err = vS.inodeCache.DeleteByKey(inode.InodeNumber) 514 if (nil != err) || !ok { 515 return 516 } 517 518 if inode == vS.inodeCacheLRUHead { 519 if inode == vS.inodeCacheLRUTail { 520 vS.inodeCacheLRUHead = nil 521 vS.inodeCacheLRUTail = nil 522 vS.inodeCacheLRUItems = 0 523 } else { 524 vS.inodeCacheLRUHead = inode.inodeCacheLRUNext 525 vS.inodeCacheLRUHead.inodeCacheLRUPrev = nil 526 vS.inodeCacheLRUItems-- 527 528 inode.inodeCacheLRUNext = nil 529 } 530 } else { 531 if inode == vS.inodeCacheLRUTail { 532 vS.inodeCacheLRUTail = inode.inodeCacheLRUPrev 533 vS.inodeCacheLRUTail.inodeCacheLRUNext = nil 534 vS.inodeCacheLRUItems-- 535 536 inode.inodeCacheLRUPrev = nil 537 } else { 538 inode.inodeCacheLRUPrev.inodeCacheLRUNext = inode.inodeCacheLRUNext 539 inode.inodeCacheLRUNext.inodeCacheLRUPrev = inode.inodeCacheLRUPrev 540 vS.inodeCacheLRUItems-- 541 542 inode.inodeCacheLRUNext = nil 543 inode.inodeCacheLRUPrev = nil 544 } 545 } 546 547 return 548 } 549 550 func (vS *volumeStruct) inodeCacheDrop(inode *inMemoryInodeStruct) (ok bool, err error) { 551 vS.Lock() 552 ok, err = vS.inodeCacheDropWhileLocked(inode) 553 vS.Unlock() 554 return 555 } 556 557 func (vS *volumeStruct) fetchInode(inodeNumber InodeNumber) (inode *inMemoryInodeStruct, ok bool, err error) { 558 for { 559 inode, ok, err = vS.inodeCacheFetch(inodeNumber) 560 if nil != err { 561 return 562 } 563 564 if ok { 565 return 566 } 567 568 inode, ok, err = vS.fetchOnDiskInode(inodeNumber) 569 if nil != err { 570 return 571 } 572 if !ok { 573 err = fmt.Errorf("%s.fetchInode(0x%016X) not found", vS.volumeName, inodeNumber) 574 return 575 } 576 577 ok, err = vS.inodeCacheInsert(inode) 578 if nil != err { 579 return 580 } 581 582 if ok { 583 return 584 } 585 586 // If we reach here, somebody beat us to it... just restart the fetch... 587 } 588 } 589 590 // Fetch inode with inode type checking 591 func (vS *volumeStruct) fetchInodeType(inodeNumber InodeNumber, expectedType InodeType) (inode *inMemoryInodeStruct, err error) { 592 inode, ok, err := vS.fetchInode(inodeNumber) 593 if nil != err { 594 return 595 } 596 if !ok { 597 err = fmt.Errorf("%s: expected inode %d volume '%s' to be type %v, but it was unallocated", 598 utils.GetFnName(), inode.InodeNumber, vS.volumeName, expectedType) 599 err = blunder.AddError(err, blunder.NotFoundError) 600 return 601 } 602 if inode.InodeType == expectedType { 603 // success 604 return 605 } 606 607 err = fmt.Errorf("%s: expected inode %d volume '%s' to be type %v, got %v", 608 utils.GetFnName(), inode.InodeNumber, vS.volumeName, expectedType, inode.InodeType) 609 610 var errVal blunder.FsError 611 switch expectedType { 612 case DirType: 613 errVal = blunder.NotDirError 614 case FileType: 615 errVal = blunder.NotFileError 616 case SymlinkType: 617 errVal = blunder.NotSymlinkError 618 default: 619 panic(fmt.Sprintf("unknown inode type=%v!", expectedType)) 620 } 621 err = blunder.AddError(err, errVal) 622 623 return 624 } 625 626 func (vS *volumeStruct) makeInMemoryInodeWithThisInodeNumber(inodeType InodeType, fileMode InodeMode, userID InodeUserID, groupID InodeGroupID, inodeNumber InodeNumber, volumeLocked bool) (inMemoryInode *inMemoryInodeStruct) { 627 var ( 628 birthTime time.Time 629 nonce uint64 630 snapShotID uint64 631 snapShotIDType headhunter.SnapShotIDType 632 ) 633 634 snapShotIDType, snapShotID, nonce = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 635 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 636 logger.Fatalf("makeInMemoryInodeWithThisInodeNumber for headhunter.SnapShotIDTypeDotSnapShot not allowed") 637 } 638 639 birthTime = time.Now() 640 641 inMemoryInode = &inMemoryInodeStruct{ 642 inodeCacheLRUNext: nil, 643 inodeCacheLRUPrev: nil, 644 dirty: true, 645 volume: vS, 646 snapShotID: snapShotID, 647 openLogSegment: nil, 648 inFlightLogSegmentMap: make(map[uint64]*inFlightLogSegmentStruct), 649 inFlightLogSegmentErrors: make(map[uint64]error), 650 onDiskInodeV1Struct: onDiskInodeV1Struct{ 651 InodeNumber: InodeNumber(nonce), 652 InodeType: inodeType, 653 CreationTime: birthTime, 654 ModificationTime: birthTime, 655 AccessTime: birthTime, 656 AttrChangeTime: birthTime, 657 NumWrites: 0, 658 Mode: fileMode, 659 UserID: userID, 660 GroupID: groupID, 661 StreamMap: make(map[string][]byte), 662 LogSegmentMap: make(map[uint64]uint64), 663 }, 664 } 665 666 return 667 } 668 669 func (vS *volumeStruct) makeInMemoryInode(inodeType InodeType, fileMode InodeMode, userID InodeUserID, groupID InodeGroupID) (inMemoryInode *inMemoryInodeStruct, err error) { 670 inodeNumberAsUint64 := vS.headhunterVolumeHandle.FetchNonce() 671 672 inMemoryInode = vS.makeInMemoryInodeWithThisInodeNumber(inodeType, fileMode, userID, groupID, InodeNumber(inodeNumberAsUint64), false) 673 674 return 675 } 676 677 func (vS *volumeStruct) PatchInode(inodeNumber InodeNumber, inodeType InodeType, linkCount uint64, mode InodeMode, userID InodeUserID, groupID InodeGroupID, parentInodeNumber InodeNumber, symlinkTarget string) (err error) { 678 var ( 679 callerID dlm.CallerID 680 inode *inMemoryInodeStruct 681 inodeNumberDecodedAsInodeNumber InodeNumber 682 inodeNumberDecodedAsUint64 uint64 683 inodeRWLock *dlm.RWLockStruct 684 modeAdornedWithInodeType InodeMode 685 ok bool 686 parentInodeNumberDecodedAsInodeNumber InodeNumber 687 parentInodeNumberDecodedAsUint64 uint64 688 payload sortedmap.BPlusTree 689 snapShotIDType headhunter.SnapShotIDType 690 ) 691 692 snapShotIDType, _, inodeNumberDecodedAsUint64 = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 693 if headhunter.SnapShotIDTypeLive != snapShotIDType { 694 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) must provide a non-SnapShot inodeNumber", inodeNumber) 695 return 696 } 697 inodeNumberDecodedAsInodeNumber = InodeNumber(inodeNumberDecodedAsUint64) 698 699 switch inodeType { 700 case DirType: 701 if 2 != linkCount { 702 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,linkCount==%v,,,,,) must set linkCount to 2", inodeNumber, linkCount) 703 return 704 } 705 if InodeNumber(0) == parentInodeNumber { 706 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0,) must provide a non-zero parentInodeNumber", inodeNumber) 707 return 708 } 709 if (RootDirInodeNumber == inodeNumber) && (RootDirInodeNumber != parentInodeNumber) { 710 err = fmt.Errorf("PatchInode(inodeNumber==RootDirInodeNumber,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) must provide RootDirInode's parent as also RootDirInodeNumber", parentInodeNumber) 711 return 712 } 713 snapShotIDType, _, parentInodeNumberDecodedAsUint64 = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 714 if headhunter.SnapShotIDTypeLive != snapShotIDType { 715 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) must provide a non-SnapShot parentInodeNumber", inodeNumber, parentInodeNumber) 716 return 717 } 718 parentInodeNumberDecodedAsInodeNumber = InodeNumber(parentInodeNumberDecodedAsUint64) 719 case FileType: 720 if 0 == linkCount { 721 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==FileType,linkCount==0,,,,,) must provide a non-zero linkCount", inodeNumber) 722 return 723 } 724 case SymlinkType: 725 if 0 == linkCount { 726 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==SymlinkType,linkCount==0,,,,,) must provide a non-zero linkCount", inodeNumber) 727 return 728 } 729 if "" == symlinkTarget { 730 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==SymlinkType,,,,,,symlinkTarget==\"\") must provide a non-empty symlinkTarget", inodeNumber) 731 return 732 } 733 default: 734 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==%v,,,,,,) must provide a inodeType of DirType(%v), FileType(%v), or SymlinkType(%v)", inodeNumber, inodeType, DirType, FileType, SymlinkType) 735 return 736 } 737 738 modeAdornedWithInodeType, err = determineMode(mode, inodeType) 739 if nil != err { 740 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==%v,,mode==0o%011o,,,,) failed: %v", inodeNumber, inodeType, mode, err) 741 return 742 } 743 744 vS.Lock() 745 746 callerID = dlm.GenerateCallerID() 747 inodeRWLock, _ = vS.InitInodeLock(inodeNumber, callerID) 748 err = inodeRWLock.TryWriteLock() 749 if nil != err { 750 vS.Unlock() 751 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) couldn't create a *dlm.RWLockStruct: %v", inodeNumber, err) 752 return 753 } 754 755 inode, ok, err = vS.inodeCacheFetchWhileLocked(inodeNumber) 756 if nil != err { 757 _ = inodeRWLock.Unlock() 758 vS.Unlock() 759 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) couldn't search inodeCache for pre-existing inode: %v", inodeNumber, err) 760 return 761 } 762 if ok { 763 if inode.dirty { 764 _ = inodeRWLock.Unlock() 765 vS.Unlock() 766 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) of dirty Inode is not allowed", inodeNumber) 767 return 768 } 769 ok, err = vS.inodeCacheDropWhileLocked(inode) 770 if nil != err { 771 _ = inodeRWLock.Unlock() 772 vS.Unlock() 773 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) drop of pre-existing inode from inodeCache failed: %v", inodeNumber, err) 774 return 775 } 776 if !ok { 777 _ = inodeRWLock.Unlock() 778 vS.Unlock() 779 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) drop of pre-existing inode from inodeCache returned !ok", inodeNumber) 780 return 781 } 782 } 783 784 inode = vS.makeInMemoryInodeWithThisInodeNumber(inodeType, modeAdornedWithInodeType, userID, groupID, inodeNumberDecodedAsInodeNumber, true) 785 786 inode.dirty = true 787 788 inode.onDiskInodeV1Struct.LinkCount = linkCount 789 790 switch inodeType { 791 case DirType: 792 payload = sortedmap.NewBPlusTree( 793 vS.maxEntriesPerDirNode, 794 sortedmap.CompareString, 795 &dirInodeCallbacks{treeNodeLoadable{inode: inode}}, 796 globals.dirEntryCache) 797 798 ok, err = payload.Put(".", inodeNumberDecodedAsInodeNumber) 799 if nil != err { 800 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) failed to insert \".\" dirEntry: %v", inodeNumber, err) 801 panic(err) 802 } 803 if !ok { 804 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) insert \".\" dirEntry got a !ok", inodeNumber) 805 panic(err) 806 } 807 808 ok, err = payload.Put("..", parentInodeNumberDecodedAsInodeNumber) 809 if nil != err { 810 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) failed to insert \"..\" dirEntry: %v", inodeNumber, parentInodeNumber, err) 811 panic(err) 812 } 813 if !ok { 814 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,parentInodeNumber==0x%016X,) insert \"..\" dirEntry got a !ok", inodeNumber, parentInodeNumber) 815 panic(err) 816 } 817 818 inode.payload = payload 819 inode.onDiskInodeV1Struct.SymlinkTarget = "" 820 case FileType: 821 payload = sortedmap.NewBPlusTree( 822 vS.maxExtentsPerFileNode, 823 sortedmap.CompareUint64, 824 &fileInodeCallbacks{treeNodeLoadable{inode: inode}}, 825 globals.fileExtentMapCache) 826 827 inode.payload = payload 828 inode.onDiskInodeV1Struct.SymlinkTarget = "" 829 case SymlinkType: 830 inode.payload = nil 831 inode.onDiskInodeV1Struct.SymlinkTarget = symlinkTarget 832 } 833 834 ok, err = vS.inodeCacheInsertWhileLocked(inode) 835 if nil != err { 836 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) failed to insert inode in inodeCache: %v", inodeNumber, err) 837 panic(err) 838 } 839 if !ok { 840 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,inodeType==DirType,,,,,,) insert of inode in inodeCache got a !ok", inodeNumber) 841 panic(err) 842 } 843 844 _ = inodeRWLock.Unlock() 845 846 vS.Unlock() 847 848 err = vS.flushInode(inode) 849 if nil != err { 850 err = fmt.Errorf("PatchInode(inodeNumber==0x%016X,,,,,,,) failed to flush: %v", inodeNumber, err) 851 panic(err) 852 } 853 854 return 855 } 856 857 func (inMemoryInode *inMemoryInodeStruct) convertToOnDiskInodeV1() (onDiskInodeV1 *onDiskInodeV1Struct, err error) { 858 onDiskInode := inMemoryInode.onDiskInodeV1Struct 859 860 if (DirType == inMemoryInode.InodeType) || (FileType == inMemoryInode.InodeType) { 861 content := inMemoryInode.payload.(sortedmap.BPlusTree) 862 payloadObjectNumber, payloadObjectOffset, payloadObjectLength, flushErr := content.Flush(false) 863 if nil != flushErr { 864 panic(flushErr) 865 } 866 pruneErr := content.Prune() 867 if nil != pruneErr { 868 panic(pruneErr) 869 } 870 if onDiskInodeV1PayloadObjectOffset != payloadObjectOffset { 871 flushErr = fmt.Errorf("Logic Error: content.Flush() should have returned payloadObjectOffset == %v", onDiskInodeV1PayloadObjectOffset) 872 panic(flushErr) 873 } 874 onDiskInode.PayloadObjectNumber = payloadObjectNumber 875 onDiskInode.PayloadObjectLength = payloadObjectLength 876 } 877 878 // maps are refernce types, so this needs to be copied manually 879 880 onDiskInode.StreamMap = make(map[string][]byte) 881 for key, value := range inMemoryInode.StreamMap { 882 valueCopy := make([]byte, len(value)) 883 copy(valueCopy, value) 884 onDiskInode.StreamMap[key] = valueCopy 885 } 886 887 onDiskInode.LogSegmentMap = make(map[uint64]uint64) 888 for logSegmentNumber, logSegmentBytesUsed := range inMemoryInode.LogSegmentMap { 889 onDiskInode.LogSegmentMap[logSegmentNumber] = logSegmentBytesUsed 890 } 891 892 return &onDiskInode, nil 893 } 894 895 func (vS *volumeStruct) flushInode(inode *inMemoryInodeStruct) (err error) { 896 err = vS.flushInodes([]*inMemoryInodeStruct{inode}) 897 return 898 } 899 900 func (vS *volumeStruct) flushInodeNumber(inodeNumber InodeNumber) (err error) { 901 err = vS.flushInodeNumbers([]InodeNumber{inodeNumber}) 902 return 903 } 904 905 // REVIEW: Need to clearly explain what "flush" means (i.e. "to HH", not "to disk") 906 907 func (vS *volumeStruct) flushInodes(inodes []*inMemoryInodeStruct) (err error) { 908 var ( 909 dirtyInodeNumbers []uint64 910 dirtyInodeRecBytes []byte 911 dirtyInodeRecs [][]byte 912 emptyLogSegments []uint64 913 emptyLogSegmentsThisInode []uint64 914 inode *inMemoryInodeStruct 915 logSegmentNumber uint64 916 logSegmentValidBytes uint64 917 onDiskInodeV1 *onDiskInodeV1Struct 918 onDiskInodeV1Buf []byte 919 payloadAsBPlusTree sortedmap.BPlusTree 920 payloadObjectLength uint64 921 payloadObjectNumber uint64 922 toFlushInodeNumbers []uint64 923 ) 924 925 halter.Trigger(halter.InodeFlushInodesEntry) 926 defer halter.Trigger(halter.InodeFlushInodesExit) 927 928 toFlushInodeNumbers = make([]uint64, 0, len(inodes)) 929 for _, inode = range inodes { 930 toFlushInodeNumbers = append(toFlushInodeNumbers, uint64(inode.InodeNumber)) 931 } 932 933 evtlog.Record(evtlog.FormatFlushInodesEntry, vS.volumeName, toFlushInodeNumbers) 934 935 // Assemble slice of "dirty" inodes while flushing them 936 dirtyInodeNumbers = make([]uint64, 0, len(inodes)) 937 dirtyInodeRecs = make([][]byte, 0, len(inodes)) 938 emptyLogSegments = make([]uint64, 0) 939 940 for _, inode = range inodes { 941 if FileType == inode.InodeType { 942 err = vS.doFileInodeDataFlush(inode) 943 if nil != err { 944 evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error()) 945 logger.ErrorWithError(err) 946 err = blunder.AddError(err, blunder.InodeFlushError) 947 return 948 } 949 emptyLogSegmentsThisInode = make([]uint64, 0) 950 for logSegmentNumber, logSegmentValidBytes = range inode.LogSegmentMap { 951 if 0 == logSegmentValidBytes { 952 emptyLogSegmentsThisInode = append(emptyLogSegmentsThisInode, logSegmentNumber) 953 } 954 } 955 for _, logSegmentNumber = range emptyLogSegmentsThisInode { 956 delete(inode.LogSegmentMap, logSegmentNumber) 957 } 958 emptyLogSegments = append(emptyLogSegments, emptyLogSegmentsThisInode...) 959 } 960 if SymlinkType != inode.InodeType { 961 // (FileType == inode.InodeType || (DirType == inode.InodeType) 962 payloadAsBPlusTree = inode.payload.(sortedmap.BPlusTree) 963 payloadObjectNumber, _, payloadObjectLength, err = payloadAsBPlusTree.Flush(false) 964 if nil != err { 965 evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error()) 966 logger.ErrorWithError(err) 967 err = blunder.AddError(err, blunder.InodeFlushError) 968 return 969 } 970 if payloadObjectNumber > inode.PayloadObjectNumber { 971 if !inode.dirty { 972 err = fmt.Errorf("Logic error: inode.dirty should have been true") 973 evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error()) 974 logger.ErrorWithError(err) 975 err = blunder.AddError(err, blunder.InodeFlushError) 976 return 977 } 978 // REVIEW: What if cache pressure flushed before we got here? 979 // Is it possible that Number doesn't get updated? 980 981 if inode.PayloadObjectNumber != 0 { 982 logger.Tracef("flushInodes(): volume '%s' %v inode %d: updating Payload"+ 983 " from Object %016X to %016X bytes %d to %d", 984 vS.volumeName, inode.InodeType, inode.InodeNumber, 985 inode.PayloadObjectNumber, payloadObjectNumber, 986 inode.PayloadObjectLength, payloadObjectLength) 987 } 988 inode.PayloadObjectNumber = payloadObjectNumber 989 inode.PayloadObjectLength = payloadObjectLength 990 991 evtlog.Record(evtlog.FormatFlushInodesDirOrFilePayloadObjectNumberUpdated, vS.volumeName, uint64(inode.InodeNumber), payloadObjectNumber) 992 } 993 } 994 if inode.dirty { 995 onDiskInodeV1, err = inode.convertToOnDiskInodeV1() 996 if nil != err { 997 evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error()) 998 logger.ErrorWithError(err) 999 err = blunder.AddError(err, blunder.InodeFlushError) 1000 return 1001 } 1002 onDiskInodeV1Buf, err = json.Marshal(onDiskInodeV1) 1003 if nil != err { 1004 evtlog.Record(evtlog.FormatFlushInodesErrorOnInode, vS.volumeName, uint64(inode.InodeNumber), err.Error()) 1005 logger.ErrorWithError(err) 1006 err = blunder.AddError(err, blunder.InodeFlushError) 1007 return 1008 } 1009 dirtyInodeRecBytes = make([]byte, 0, len(globals.inodeRecDefaultPreambleBuf)+len(onDiskInodeV1Buf)) 1010 dirtyInodeRecBytes = append(dirtyInodeRecBytes, globals.inodeRecDefaultPreambleBuf...) 1011 dirtyInodeRecBytes = append(dirtyInodeRecBytes, onDiskInodeV1Buf...) 1012 dirtyInodeNumbers = append(dirtyInodeNumbers, uint64(inode.InodeNumber)) 1013 dirtyInodeRecs = append(dirtyInodeRecs, dirtyInodeRecBytes) 1014 } 1015 } 1016 1017 // Go update HeadHunter (if necessary) 1018 if 0 < len(dirtyInodeNumbers) { 1019 err = vS.headhunterVolumeHandle.PutInodeRecs(dirtyInodeNumbers, dirtyInodeRecs) 1020 if nil != err { 1021 evtlog.Record(evtlog.FormatFlushInodesErrorOnHeadhunterPut, vS.volumeName, err.Error()) 1022 logger.ErrorWithError(err) 1023 err = blunder.AddError(err, blunder.InodeFlushError) 1024 return 1025 } 1026 for _, inode = range inodes { 1027 inode.dirty = false 1028 } 1029 } 1030 1031 // Now do phase one of garbage collection 1032 if 0 < len(emptyLogSegments) { 1033 for _, logSegmentNumber = range emptyLogSegments { 1034 err = vS.headhunterVolumeHandle.DeleteLogSegmentRec(logSegmentNumber) 1035 if nil != err { 1036 logger.WarnfWithError(err, "couldn't delete garbage log segment") 1037 } 1038 } 1039 } 1040 1041 evtlog.Record(evtlog.FormatFlushInodesExit, vS.volumeName, toFlushInodeNumbers) 1042 1043 err = nil 1044 return 1045 } 1046 1047 func (vS *volumeStruct) flushInodeNumbers(inodeNumbers []InodeNumber) (err error) { 1048 var ( 1049 inode *inMemoryInodeStruct 1050 inodes []*inMemoryInodeStruct 1051 inodeNumber InodeNumber 1052 ok bool 1053 ) 1054 1055 // Fetch referenced inodes 1056 inodes = make([]*inMemoryInodeStruct, 0, len(inodeNumbers)) 1057 for _, inodeNumber = range inodeNumbers { 1058 inode, ok, err = vS.fetchInode(inodeNumber) 1059 if nil != err { 1060 // the inode is locked so this should never happen (unless the inode 1061 // was evicted from the cache and it was corrupt when read from disk) 1062 // (err includes volume name and inode number) 1063 logger.ErrorfWithError(err, "%s: fetch of inode to flush failed", utils.GetFnName()) 1064 err = blunder.AddError(err, blunder.InodeFlushError) 1065 return 1066 } 1067 if !ok { 1068 // this should never happen (see above) 1069 err = fmt.Errorf("%s: fetch of inode %d volume '%s' failed because it is unallocated", 1070 utils.GetFnName(), inodeNumber, vS.volumeName) 1071 logger.ErrorWithError(err) 1072 err = blunder.AddError(err, blunder.NotFoundError) 1073 return 1074 } 1075 1076 inodes = append(inodes, inode) 1077 } 1078 1079 err = vS.flushInodes(inodes) 1080 1081 return 1082 } 1083 1084 func accountNameToVolumeName(accountName string) (volumeName string, ok bool) { 1085 var ( 1086 volume *volumeStruct 1087 ) 1088 1089 globals.Lock() 1090 1091 volume, ok = globals.accountMap[accountName] 1092 if ok { 1093 volumeName = volume.volumeName 1094 } 1095 1096 globals.Unlock() 1097 1098 return 1099 } 1100 1101 func volumeNameToAccountName(volumeName string) (accountName string, ok bool) { 1102 var ( 1103 volume *volumeStruct 1104 ) 1105 1106 globals.Lock() 1107 1108 volume, ok = globals.volumeMap[volumeName] 1109 if ok { 1110 accountName = volume.accountName 1111 } 1112 1113 globals.Unlock() 1114 1115 return 1116 } 1117 1118 func volumeNameToActivePeerPrivateIPAddr(volumeName string) (activePeerPrivateIPAddr string, ok bool) { 1119 var ( 1120 volume *volumeStruct 1121 ) 1122 1123 globals.Lock() 1124 1125 volume, ok = globals.volumeMap[volumeName] 1126 1127 if ok { 1128 activePeerPrivateIPAddr = volume.volumeGroup.activePeerPrivateIPAddr 1129 } 1130 1131 globals.Unlock() 1132 1133 return 1134 } 1135 1136 func fetchVolumeHandle(volumeName string) (volumeHandle VolumeHandle, err error) { 1137 globals.Lock() 1138 volume, ok := globals.volumeMap[volumeName] 1139 globals.Unlock() 1140 1141 if !ok { 1142 err = fmt.Errorf("%s: volumeName \"%v\" not found", utils.GetFnName(), volumeName) 1143 err = blunder.AddError(err, blunder.NotFoundError) 1144 return 1145 } 1146 1147 volumeHandle = volume 1148 1149 volume.Lock() // REVIEW: Once Tracker https://www.pivotaltracker.com/story/show/133377567 1150 defer volume.Unlock() // is resolved, these two lines should be removed 1151 1152 if !volume.served { 1153 err = fmt.Errorf("%s: volumeName \"%v\" not served", utils.GetFnName(), volumeName) 1154 err = blunder.AddError(err, blunder.NotActiveError) 1155 return 1156 } 1157 1158 _, ok, err = volume.headhunterVolumeHandle.GetInodeRec(uint64(RootDirInodeNumber)) 1159 if nil != err { 1160 // disk corruption of the inode btree (or software error) 1161 err = fmt.Errorf("%s: unable to lookup root inode for volume '%s': %v", 1162 utils.GetFnName(), volume.volumeName, err) 1163 err = blunder.AddError(err, blunder.NotFoundError) 1164 } 1165 if !ok { 1166 // First access didn't find root dir... so create it 1167 _, err = volume.createRootOrSubDir(PosixModePerm, 0, 0, true) 1168 if nil != err { 1169 err = fmt.Errorf("%s: unable to create root inode for volume '%s': %v", 1170 utils.GetFnName(), volume.volumeName, err) 1171 err = blunder.AddError(err, blunder.NotFoundError) 1172 } 1173 } 1174 1175 // If we get this far, return values are already set as desired 1176 1177 err = nil 1178 1179 return 1180 } 1181 1182 func (vS *volumeStruct) provisionPhysicalContainer(physicalContainerLayout *physicalContainerLayoutStruct) (err error) { 1183 if 0 == (physicalContainerLayout.containerNameSliceLoopCount % physicalContainerLayout.maxObjectsPerContainer) { 1184 // We need to provision a new PhysicalContainer in this PhysicalContainerLayout 1185 1186 physicalContainerNameSuffix := vS.headhunterVolumeHandle.FetchNonce() 1187 1188 newContainerName := fmt.Sprintf("%s%s", physicalContainerLayout.containerNamePrefix, utils.Uint64ToHexStr(physicalContainerNameSuffix)) 1189 1190 storagePolicyHeaderValues := []string{vS.defaultPhysicalContainerLayout.containerStoragePolicy} 1191 newContainerHeaders := make(map[string][]string) 1192 newContainerHeaders["X-Storage-Policy"] = storagePolicyHeaderValues 1193 1194 err = swiftclient.ContainerPut(vS.accountName, newContainerName, newContainerHeaders) 1195 if nil != err { 1196 return 1197 } 1198 1199 physicalContainerLayout.containerNameSlice[physicalContainerLayout.containerNameSliceNextIndex] = newContainerName 1200 } 1201 1202 err = nil 1203 return 1204 } 1205 1206 func (vS *volumeStruct) provisionObject() (containerName string, objectNumber uint64, err error) { 1207 objectNumber = vS.headhunterVolumeHandle.FetchNonce() 1208 1209 vS.Lock() 1210 1211 err = vS.provisionPhysicalContainer(vS.defaultPhysicalContainerLayout) 1212 if nil != err { 1213 vS.Unlock() 1214 return 1215 } 1216 1217 containerName = vS.defaultPhysicalContainerLayout.containerNameSlice[vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex] 1218 1219 vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex++ 1220 1221 if vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex == vS.defaultPhysicalContainerLayout.containersPerPeer { 1222 vS.defaultPhysicalContainerLayout.containerNameSliceNextIndex = 0 1223 vS.defaultPhysicalContainerLayout.containerNameSliceLoopCount++ 1224 } 1225 1226 vS.Unlock() 1227 1228 err = nil 1229 return 1230 } 1231 1232 func (vS *volumeStruct) Access(inodeNumber InodeNumber, userID InodeUserID, groupID InodeGroupID, otherGroupIDs []InodeGroupID, accessMode InodeMode, override AccessOverride) (accessReturn bool) { 1233 var ( 1234 adjustedInodeNumber InodeNumber 1235 err error 1236 groupIDCheck bool 1237 ok bool 1238 otherGroupID InodeGroupID 1239 ourInode *inMemoryInodeStruct 1240 ourInodeGroupID InodeGroupID 1241 ourInodeMode InodeMode 1242 ourInodeUserID InodeUserID 1243 snapShotIDType headhunter.SnapShotIDType 1244 ) 1245 1246 snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1247 1248 switch snapShotIDType { 1249 case headhunter.SnapShotIDTypeLive: 1250 adjustedInodeNumber = inodeNumber 1251 case headhunter.SnapShotIDTypeSnapShot: 1252 adjustedInodeNumber = inodeNumber 1253 case headhunter.SnapShotIDTypeDotSnapShot: 1254 adjustedInodeNumber = RootDirInodeNumber 1255 default: 1256 logger.Fatalf("headhunter.SnapShotU64Decode(inodeNumber == 0x%016X) returned unknown snapShotIDType: %v", inodeNumber, snapShotIDType) 1257 } 1258 if (headhunter.SnapShotIDTypeLive != snapShotIDType) && (0 != (W_OK & accessMode)) { 1259 err = blunder.NewError(blunder.InvalidArgError, "Access() where accessMode includes W_OK of non-LiveView inodeNumber not allowed") 1260 return 1261 } 1262 1263 ourInode, ok, err = vS.fetchInode(adjustedInodeNumber) 1264 if nil != err { 1265 // this indicates disk corruption or software bug 1266 // (err includes volume name and inode number) 1267 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 1268 1269 // if we can't fetch the inode we can't access it 1270 accessReturn = false 1271 return 1272 } 1273 if !ok { 1274 // disk corruption or client requested a free inode 1275 logger.Infof("%s: fetch of inode %d volume '%s' failed because it is unallocated", 1276 utils.GetFnName(), inodeNumber, vS.volumeName) 1277 1278 // if the inode is free then we can't access it 1279 accessReturn = false 1280 return 1281 } 1282 1283 ourInodeUserID = ourInode.UserID 1284 ourInodeGroupID = ourInode.GroupID 1285 1286 if headhunter.SnapShotIDTypeLive == snapShotIDType { 1287 ourInodeMode = ourInode.Mode 1288 } else { 1289 ourInodeMode = ourInode.Mode // TODO: Make it read-only... 1290 } 1291 1292 if F_OK == accessMode { 1293 // the inode exists so its F_OK 1294 accessReturn = true 1295 return 1296 } 1297 1298 if P_OK == accessMode { 1299 accessReturn = (InodeRootUserID == userID) || (userID == ourInodeUserID) 1300 return 1301 } 1302 1303 if accessMode != (accessMode & (R_OK | W_OK | X_OK)) { 1304 // Default to false if P_OK bit set along with any others) 1305 accessReturn = false 1306 return 1307 } 1308 1309 // Only the LiveView is ever writeable... even by the root user 1310 if (accessMode&W_OK != 0) && (headhunter.SnapShotIDTypeLive != snapShotIDType) { 1311 accessReturn = false 1312 return 1313 } 1314 1315 // The root user (if not squashed) can do anything except exec files 1316 // that are not executable by any user 1317 if userID == InodeRootUserID { 1318 if (accessMode&X_OK != 0) && (ourInodeMode&(X_OK<<6|X_OK<<3|X_OK) == 0) { 1319 accessReturn = false 1320 } else { 1321 accessReturn = true 1322 } 1323 return 1324 } 1325 1326 // We check against permissions for the user, group, and other. The 1327 // first match wins (not the first permission granted). If the user is 1328 // the owner of the file then those permission bits determine what 1329 // happens. In other words, if the permission bits deny read permission 1330 // to the owner of a file but allow read permission for group and other, 1331 // then everyone except the owner of the file can read it. 1332 // 1333 // On a local file system, the owner of a file is *not* allowed to write 1334 // to the file unless it was opened for writing and the permission bits 1335 // allowed it *or* the process created the file and opened it for 1336 // writing at the same time. However, NFS does not have an open state 1337 // (there's no file descriptor that tracks permissions when the the file 1338 // was opened) so we check for write permission on every write. This 1339 // breaks things like tar when it tries to unpack a file which has 1340 // permission 0444 (read only). On a local file system that works, but 1341 // it doesn't work for NFS unless we bend the rules a bit for the owner 1342 // of the file and allow the owner to write to the file even if 1343 // appropriate permissions are lacking. (This is only done for the user 1344 // that owns the file, not the group that owns the file. Note that the 1345 // owner can always change the permissions to allow writing so its not a 1346 // security risk, but the owning group cannot). 1347 // 1348 // Note that the NFS client will typically call Access() when an app 1349 // wants to open the file and fail an open request for writing that if 1350 // the permission bits do not allow it. 1351 // 1352 // Similar rules apply to Read() and Truncate() (for ftruncate(2)), but 1353 // not for execute permission. Also, this only applies to regular files 1354 // but we'll rely on the caller for that. 1355 if userID == ourInodeUserID { 1356 if override == OwnerOverride && (accessMode&X_OK == 0) { 1357 accessReturn = true 1358 } else { 1359 accessReturn = (((ourInodeMode >> 6) & accessMode) == accessMode) 1360 } 1361 return 1362 } 1363 1364 groupIDCheck = (groupID == ourInodeGroupID) 1365 if !groupIDCheck { 1366 for _, otherGroupID = range otherGroupIDs { 1367 if otherGroupID == ourInodeGroupID { 1368 groupIDCheck = true 1369 break 1370 } 1371 } 1372 } 1373 if groupIDCheck { 1374 accessReturn = ((((ourInodeMode >> 3) & 07) & accessMode) == accessMode) 1375 return 1376 } 1377 1378 accessReturn = ((((ourInodeMode >> 0) & 07) & accessMode) == accessMode) 1379 return 1380 } 1381 1382 func (vS *volumeStruct) ProvisionObject() (objectPath string, err error) { 1383 err = enforceRWMode(true) 1384 if nil != err { 1385 return 1386 } 1387 1388 containerName, objectNumber, err := vS.provisionObject() 1389 if nil != err { 1390 return 1391 } 1392 1393 objectPath = fmt.Sprintf("/v1/%s/%s/%016X", vS.accountName, containerName, objectNumber) 1394 1395 err = nil 1396 return 1397 } 1398 1399 func (vS *volumeStruct) Purge(inodeNumber InodeNumber) (err error) { 1400 var ( 1401 inode *inMemoryInodeStruct 1402 ok bool 1403 ) 1404 1405 err = enforceRWMode(false) 1406 if nil != err { 1407 return 1408 } 1409 1410 inode, ok, err = vS.inodeCacheFetch(inodeNumber) 1411 if (nil != err) || !ok { 1412 return 1413 } 1414 1415 if inode.dirty { 1416 err = fmt.Errorf("Inode dirty... cannot be purged") 1417 return 1418 } 1419 1420 ok, err = vS.inodeCacheDrop(inode) 1421 if nil != err { 1422 return 1423 } 1424 if !ok { 1425 err = fmt.Errorf("inodeCacheDrop(inode) failed") 1426 } 1427 1428 return 1429 } 1430 1431 func (vS *volumeStruct) Destroy(inodeNumber InodeNumber) (err error) { 1432 logger.Tracef("inode.Destroy(): volume '%s' inode %d", vS.volumeName, inodeNumber) 1433 1434 err = enforceRWMode(false) 1435 if nil != err { 1436 return 1437 } 1438 1439 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1440 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1441 err = fmt.Errorf("Destroy() on non-LiveView inodeNumber not allowed") 1442 return 1443 } 1444 1445 ourInode, ok, err := vS.fetchInode(inodeNumber) 1446 if nil != err { 1447 // the inode is locked so this should never happen (unless the inode 1448 // was evicted from the cache and it was corrupt when read from disk) 1449 // (err includes volume name and inode number) 1450 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 1451 return 1452 } 1453 if !ok { 1454 // this should never happen (see above) 1455 err = fmt.Errorf("%s: cannot destroy inode %d volume '%s' because it is unallocated", 1456 utils.GetFnName(), inodeNumber, vS.volumeName) 1457 err = blunder.AddError(err, blunder.NotFoundError) 1458 logger.ErrorWithError(err) 1459 return 1460 } 1461 1462 ok, err = vS.inodeCacheDrop(ourInode) 1463 if nil != err { 1464 logger.ErrorfWithError(err, "%s: inodeCacheDrop() of inode failed: %v", utils.GetFnName(), err) 1465 return 1466 } 1467 if !ok { 1468 logger.ErrorfWithError(err, "%s: inodeCacheDrop() of inode returned !ok", utils.GetFnName()) 1469 return 1470 } 1471 1472 if ourInode.InodeType == FileType { 1473 _ = vS.doFileInodeDataFlush(ourInode) 1474 } 1475 1476 err = vS.headhunterVolumeHandle.DeleteInodeRec(uint64(inodeNumber)) 1477 if nil != err { 1478 logger.ErrorWithError(err) 1479 return 1480 } 1481 1482 if DirType == ourInode.InodeType { 1483 logger.Tracef("inode.Destroy(): volume '%s' inode %d: discarding dirmap payload Object %016X len %d", 1484 vS.volumeName, inodeNumber, ourInode.PayloadObjectNumber, ourInode.PayloadObjectLength) 1485 1486 dirMapping := ourInode.payload.(sortedmap.BPlusTree) 1487 1488 err = dirMapping.Discard() 1489 if nil != err { 1490 logger.ErrorWithError(err) 1491 return 1492 } 1493 1494 stats.IncrementOperations(&stats.DirDestroyOps) 1495 1496 } else if FileType == ourInode.InodeType { 1497 logger.Tracef("inode.Destroy(): volume '%s' inode %d: discarding extmap payload Object %016X len %d", 1498 vS.volumeName, inodeNumber, ourInode.PayloadObjectNumber, ourInode.PayloadObjectLength) 1499 1500 extents := ourInode.payload.(sortedmap.BPlusTree) 1501 1502 err = extents.Discard() 1503 if nil != err { 1504 logger.ErrorWithError(err) 1505 return 1506 } 1507 1508 for logSegmentNumber := range ourInode.LogSegmentMap { 1509 deleteSegmentErr := vS.headhunterVolumeHandle.DeleteLogSegmentRec(logSegmentNumber) 1510 if nil != deleteSegmentErr { 1511 logger.WarnfWithError(deleteSegmentErr, "couldn't delete destroy'd log segment") 1512 return 1513 } 1514 stats.IncrementOperations(&stats.GcLogSegDeleteOps) 1515 } 1516 stats.IncrementOperations(&stats.GcLogSegOps) 1517 1518 stats.IncrementOperations(&stats.FileDestroyOps) 1519 } else { // SymlinkType == ourInode.InodeType 1520 stats.IncrementOperations(&stats.SymlinkDestroyOps) 1521 } 1522 1523 return 1524 } 1525 1526 func (vS *volumeStruct) GetMetadata(inodeNumber InodeNumber) (metadata *MetadataStruct, err error) { 1527 var ( 1528 inode *inMemoryInodeStruct 1529 ok bool 1530 pos int 1531 snapShotIDType headhunter.SnapShotIDType 1532 ) 1533 1534 snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1535 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 1536 // For /<SnapShotDirName>, start with metadata from / 1537 inode, ok, err = vS.fetchInode(RootDirInodeNumber) 1538 } else { 1539 inode, ok, err = vS.fetchInode(inodeNumber) 1540 } 1541 1542 if nil != err { 1543 // this indicates disk corruption or software error 1544 // (err includes volume name and inode number) 1545 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 1546 return 1547 } 1548 if !ok { 1549 // disk corruption or client request for unallocated inode 1550 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1551 utils.GetFnName(), inodeNumber, vS.volumeName) 1552 err = blunder.AddError(err, blunder.NotFoundError) 1553 logger.InfoWithError(err) 1554 return 1555 } 1556 1557 metadata = &MetadataStruct{ 1558 InodeType: inode.InodeType, 1559 LinkCount: inode.LinkCount, 1560 Size: inode.Size, 1561 CreationTime: inode.CreationTime, 1562 ModificationTime: inode.ModificationTime, 1563 AccessTime: inode.AccessTime, 1564 AttrChangeTime: inode.AttrChangeTime, 1565 NumWrites: inode.NumWrites, 1566 InodeStreamNameSlice: make([]string, len(inode.StreamMap)), 1567 Mode: inode.Mode, 1568 UserID: inode.UserID, 1569 GroupID: inode.GroupID, 1570 } 1571 1572 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 1573 // For /<SnapShotDirName>, simply remove Write Access... and skip InodeStreamNameSlice 1574 metadata.Mode &= metadata.Mode & ^(W_OK<<6 | W_OK<<3 | W_OK<<0) 1575 } else { 1576 if headhunter.SnapShotIDTypeSnapShot == snapShotIDType { 1577 // For inodes in a SnapShot, simply remove Write Access 1578 metadata.Mode &= metadata.Mode & ^(W_OK<<6 | W_OK<<3 | W_OK<<0) 1579 } 1580 pos = 0 1581 for inodeStreamName := range inode.StreamMap { 1582 metadata.InodeStreamNameSlice[pos] = inodeStreamName 1583 pos++ 1584 } 1585 } 1586 1587 stats.IncrementOperations(&stats.InodeGetMetadataOps) 1588 return 1589 } 1590 1591 func (vS *volumeStruct) GetType(inodeNumber InodeNumber) (inodeType InodeType, err error) { 1592 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1593 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 1594 inodeType = DirType 1595 err = nil 1596 return 1597 } 1598 1599 inode, ok, err := vS.fetchInode(inodeNumber) 1600 if nil != err { 1601 // this indicates disk corruption or software error 1602 // (err includes volume name and inode number) 1603 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 1604 return 1605 } 1606 if !ok { 1607 // disk corruption or client request for unallocated inode 1608 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1609 utils.GetFnName(), inodeNumber, vS.volumeName) 1610 logger.InfoWithError(err) 1611 err = blunder.AddError(err, blunder.NotFoundError) 1612 return 1613 } 1614 1615 inodeType = inode.InodeType 1616 1617 stats.IncrementOperations(&stats.InodeGetTypeOps) 1618 return 1619 } 1620 1621 func (vS *volumeStruct) GetLinkCount(inodeNumber InodeNumber) (linkCount uint64, err error) { 1622 var ( 1623 adjustLinkCountForSnapShotSubDirInRootDirInode bool 1624 inode *inMemoryInodeStruct 1625 ok bool 1626 snapShotCount uint64 1627 snapShotIDType headhunter.SnapShotIDType 1628 ) 1629 1630 if RootDirInodeNumber == inodeNumber { 1631 // Account for .. in /<SnapShotDirName> if any SnapShot's exist 1632 snapShotCount = vS.headhunterVolumeHandle.SnapShotCount() 1633 adjustLinkCountForSnapShotSubDirInRootDirInode = (0 != snapShotCount) 1634 } else { 1635 snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1636 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 1637 // linkCount == 1 (/<SnapShotDirName>'s '.') + 1 (/'s reference to <SnapShotDirName>) + # SnapShot's (/..' in each SnapShot's /) 1638 snapShotCount = vS.headhunterVolumeHandle.SnapShotCount() 1639 linkCount = 1 + 1 + snapShotCount 1640 err = nil 1641 return 1642 } 1643 adjustLinkCountForSnapShotSubDirInRootDirInode = false 1644 } 1645 1646 inode, ok, err = vS.fetchInode(inodeNumber) 1647 if nil != err { 1648 // this indicates disk corruption or software error 1649 // (err includes volume name and inode number) 1650 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 1651 return 1652 } 1653 if !ok { 1654 // disk corruption or client request for unallocated inode 1655 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1656 utils.GetFnName(), inodeNumber, vS.volumeName) 1657 logger.InfoWithError(err) 1658 err = blunder.AddError(err, blunder.NotFoundError) 1659 return 1660 } 1661 1662 if adjustLinkCountForSnapShotSubDirInRootDirInode { 1663 linkCount = inode.LinkCount + 1 1664 } else { 1665 linkCount = inode.LinkCount 1666 } 1667 1668 return 1669 } 1670 1671 // SetLinkCount is used to adjust the LinkCount property to match current reference count during FSCK TreeWalk. 1672 func (vS *volumeStruct) SetLinkCount(inodeNumber InodeNumber, linkCount uint64) (err error) { 1673 err = enforceRWMode(false) 1674 if nil != err { 1675 return 1676 } 1677 1678 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1679 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1680 err = fmt.Errorf("SetLinkCount() on non-LiveView inodeNumber not allowed") 1681 return 1682 } 1683 1684 inode, ok, err := vS.fetchInode(inodeNumber) 1685 if err != nil { 1686 // this indicates disk corruption or software error 1687 // (err includes volume name and inode number) 1688 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 1689 return 1690 } 1691 if !ok { 1692 // disk corruption or client request for unallocated inode 1693 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1694 utils.GetFnName(), inodeNumber, vS.volumeName) 1695 logger.InfoWithError(err) 1696 err = blunder.AddError(err, blunder.NotFoundError) 1697 return 1698 } 1699 1700 inode.dirty = true 1701 inode.LinkCount = linkCount 1702 1703 err = vS.flushInode(inode) 1704 if err != nil { 1705 logger.ErrorWithError(err) 1706 return err 1707 } 1708 1709 return 1710 } 1711 1712 func (vS *volumeStruct) SetCreationTime(inodeNumber InodeNumber, CreationTime time.Time) (err error) { 1713 err = enforceRWMode(false) 1714 if nil != err { 1715 return 1716 } 1717 1718 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1719 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1720 err = fmt.Errorf("SetCreationTime() on non-LiveView inodeNumber not allowed") 1721 return 1722 } 1723 1724 inode, ok, err := vS.fetchInode(inodeNumber) 1725 if err != nil { 1726 // the inode is locked so this should never happen (unless the inode 1727 // was evicted from the cache and it was corrupt when read from disk) 1728 logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName()) 1729 return err 1730 } 1731 if !ok { 1732 // this should never happen (see above) 1733 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1734 utils.GetFnName(), inodeNumber, vS.volumeName) 1735 logger.ErrorWithError(err) 1736 err = blunder.AddError(err, blunder.NotFoundError) 1737 return err 1738 } 1739 1740 inode.dirty = true 1741 inode.AttrChangeTime = time.Now() 1742 inode.CreationTime = CreationTime 1743 1744 err = vS.flushInode(inode) 1745 if err != nil { 1746 logger.ErrorWithError(err) 1747 return err 1748 } 1749 return 1750 } 1751 1752 func (vS *volumeStruct) SetModificationTime(inodeNumber InodeNumber, ModificationTime time.Time) (err error) { 1753 err = enforceRWMode(false) 1754 if nil != err { 1755 return 1756 } 1757 1758 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1759 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1760 err = fmt.Errorf("SetModificationTime() on non-LiveView inodeNumber not allowed") 1761 return 1762 } 1763 1764 inode, ok, err := vS.fetchInode(inodeNumber) 1765 if err != nil { 1766 // the inode is locked so this should never happen (unless the inode 1767 // was evicted from the cache and it was corrupt when read from disk) 1768 logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName()) 1769 return err 1770 } 1771 if !ok { 1772 // this should never happen (see above) 1773 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1774 utils.GetFnName(), inodeNumber, vS.volumeName) 1775 logger.ErrorWithError(err) 1776 err = blunder.AddError(err, blunder.NotFoundError) 1777 return err 1778 } 1779 1780 inode.dirty = true 1781 inode.AttrChangeTime = time.Now() 1782 inode.ModificationTime = ModificationTime 1783 1784 err = vS.flushInode(inode) 1785 if err != nil { 1786 logger.ErrorWithError(err) 1787 return err 1788 } 1789 1790 return 1791 } 1792 1793 func (vS *volumeStruct) SetAccessTime(inodeNumber InodeNumber, accessTime time.Time) (err error) { 1794 err = enforceRWMode(false) 1795 if nil != err { 1796 return 1797 } 1798 1799 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1800 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1801 err = fmt.Errorf("SetAccessTime() on non-LiveView inodeNumber not allowed") 1802 return 1803 } 1804 1805 inode, ok, err := vS.fetchInode(inodeNumber) 1806 if err != nil { 1807 // the inode is locked so this should never happen (unless the inode 1808 // was evicted from the cache and it was corrupt when read from disk) 1809 logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName()) 1810 return err 1811 } 1812 if !ok { 1813 // this should never happen (see above) 1814 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1815 utils.GetFnName(), inodeNumber, vS.volumeName) 1816 logger.ErrorWithError(err) 1817 err = blunder.AddError(err, blunder.NotFoundError) 1818 return err 1819 } 1820 1821 inode.dirty = true 1822 inode.AttrChangeTime = time.Now() 1823 inode.AccessTime = accessTime 1824 1825 err = vS.flushInode(inode) 1826 if err != nil { 1827 logger.ErrorWithError(err) 1828 return err 1829 } 1830 1831 return 1832 } 1833 1834 func determineMode(filePerm InodeMode, inodeType InodeType) (fileMode InodeMode, err error) { 1835 // Caller should only be setting the file perm bits, but samba seems to send file type 1836 // bits as well. Since we need to work with whatever samba does, let's just silently 1837 // mask off the other bits. 1838 if filePerm&^PosixModePerm != 0 { 1839 logger.Tracef("inode.determineMode(): invalid file mode 0x%x (max 0x%x); removing file type bits.", uint32(filePerm), uint32(PosixModePerm)) 1840 } 1841 1842 // Build fileMode starting with the file permission bits 1843 fileMode = filePerm & PosixModePerm 1844 1845 // Add the file type to the mode. 1846 switch inodeType { 1847 case DirType: 1848 fileMode |= PosixModeDir 1849 case FileType: 1850 fileMode |= PosixModeFile 1851 case SymlinkType: 1852 fileMode |= PosixModeSymlink 1853 default: 1854 err = fmt.Errorf("%s: unrecognized inode type %v", utils.GetFnName(), inodeType) 1855 err = blunder.AddError(err, blunder.InvalidInodeTypeError) 1856 return 1857 } 1858 1859 err = nil 1860 return 1861 } 1862 1863 func (vS *volumeStruct) SetPermMode(inodeNumber InodeNumber, filePerm InodeMode) (err error) { 1864 err = enforceRWMode(false) 1865 if nil != err { 1866 return 1867 } 1868 1869 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1870 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1871 err = fmt.Errorf("SetPermMode() on non-LiveView inodeNumber not allowed") 1872 return 1873 } 1874 1875 inode, ok, err := vS.fetchInode(inodeNumber) 1876 if err != nil { 1877 // the inode is locked so this should never happen (unless the inode 1878 // was evicted from the cache and it was corrupt when read from disk) 1879 logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName()) 1880 return err 1881 } 1882 if !ok { 1883 // this should never happen (see above) 1884 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1885 utils.GetFnName(), inodeNumber, vS.volumeName) 1886 logger.ErrorWithError(err) 1887 err = blunder.AddError(err, blunder.NotFoundError) 1888 return err 1889 } 1890 1891 // Create file mode out of file permissions plus inode type 1892 fileMode, err := determineMode(filePerm, inode.InodeType) 1893 if err != nil { 1894 return err 1895 } 1896 1897 inode.dirty = true 1898 inode.Mode = fileMode 1899 1900 updateTime := time.Now() 1901 inode.AttrChangeTime = updateTime 1902 1903 err = vS.flushInode(inode) 1904 if err != nil { 1905 logger.ErrorWithError(err) 1906 return err 1907 } 1908 1909 return 1910 } 1911 1912 func (vS *volumeStruct) SetOwnerUserID(inodeNumber InodeNumber, userID InodeUserID) (err error) { 1913 err = enforceRWMode(false) 1914 if nil != err { 1915 return 1916 } 1917 1918 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1919 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1920 err = fmt.Errorf("SetOwnerUserID() on non-LiveView inodeNumber not allowed") 1921 return 1922 } 1923 1924 inode, ok, err := vS.fetchInode(inodeNumber) 1925 if err != nil { 1926 // the inode is locked so this should never happen (unless the inode 1927 // was evicted from the cache and it was corrupt when read from disk) 1928 logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName()) 1929 return err 1930 } 1931 if !ok { 1932 // this should never happen (see above) 1933 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1934 utils.GetFnName(), inodeNumber, vS.volumeName) 1935 logger.ErrorWithError(err) 1936 err = blunder.AddError(err, blunder.NotFoundError) 1937 return err 1938 } 1939 1940 inode.dirty = true 1941 inode.UserID = userID 1942 1943 updateTime := time.Now() 1944 inode.AttrChangeTime = updateTime 1945 1946 err = vS.flushInode(inode) 1947 if err != nil { 1948 logger.ErrorWithError(err) 1949 return err 1950 } 1951 1952 return 1953 } 1954 1955 func (vS *volumeStruct) SetOwnerUserIDGroupID(inodeNumber InodeNumber, userID InodeUserID, groupID InodeGroupID) (err error) { 1956 err = enforceRWMode(false) 1957 if nil != err { 1958 return 1959 } 1960 1961 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 1962 if headhunter.SnapShotIDTypeLive != snapShotIDType { 1963 err = fmt.Errorf("SetOwnerUserIDGroupID() on non-LiveView inodeNumber not allowed") 1964 return 1965 } 1966 1967 inode, ok, err := vS.fetchInode(inodeNumber) 1968 if err != nil { 1969 // the inode is locked so this should never happen (unless the inode 1970 // was evicted from the cache and it was corrupt when read from disk) 1971 logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName()) 1972 return err 1973 } 1974 if !ok { 1975 // this should never happen (see above) 1976 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 1977 utils.GetFnName(), inodeNumber, vS.volumeName) 1978 logger.ErrorWithError(err) 1979 err = blunder.AddError(err, blunder.NotFoundError) 1980 return err 1981 } 1982 1983 inode.dirty = true 1984 inode.UserID = userID 1985 inode.GroupID = groupID 1986 1987 updateTime := time.Now() 1988 inode.AttrChangeTime = updateTime 1989 1990 err = vS.flushInode(inode) 1991 if err != nil { 1992 logger.ErrorWithError(err) 1993 return err 1994 } 1995 1996 return 1997 } 1998 1999 func (vS *volumeStruct) SetOwnerGroupID(inodeNumber InodeNumber, groupID InodeGroupID) (err error) { 2000 err = enforceRWMode(false) 2001 if nil != err { 2002 return 2003 } 2004 2005 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 2006 if headhunter.SnapShotIDTypeLive != snapShotIDType { 2007 err = fmt.Errorf("SetOwnerGroupID() on non-LiveView inodeNumber not allowed") 2008 return 2009 } 2010 2011 inode, ok, err := vS.fetchInode(inodeNumber) 2012 if err != nil { 2013 // the inode is locked so this should never happen (unless the inode 2014 // was evicted from the cache and it was corrupt when read from disk) 2015 logger.ErrorfWithError(err, "%s: fetch of target inode failed", utils.GetFnName()) 2016 return err 2017 } 2018 if !ok { 2019 // this should never happen (see above) 2020 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 2021 utils.GetFnName(), inodeNumber, vS.volumeName) 2022 logger.ErrorWithError(err) 2023 err = blunder.AddError(err, blunder.NotFoundError) 2024 return err 2025 } 2026 2027 inode.dirty = true 2028 inode.GroupID = groupID 2029 2030 updateTime := time.Now() 2031 inode.AttrChangeTime = updateTime 2032 2033 err = vS.flushInode(inode) 2034 if err != nil { 2035 logger.ErrorWithError(err) 2036 return err 2037 } 2038 2039 return 2040 } 2041 2042 func (vS *volumeStruct) GetStream(inodeNumber InodeNumber, inodeStreamName string) (buf []byte, err error) { 2043 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 2044 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 2045 err = fmt.Errorf("No stream '%v'", inodeStreamName) 2046 return buf, blunder.AddError(err, blunder.StreamNotFound) 2047 } 2048 2049 inode, ok, err := vS.fetchInode(inodeNumber) 2050 if err != nil { 2051 // this indicates disk corruption or software error 2052 // (err includes volume name and inode number) 2053 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 2054 return nil, err 2055 } 2056 if !ok { 2057 // disk corruption or client request for unallocated inode 2058 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 2059 utils.GetFnName(), inodeNumber, vS.volumeName) 2060 logger.InfoWithError(err) 2061 err = blunder.AddError(err, blunder.NotFoundError) 2062 return nil, err 2063 } 2064 2065 inodeStreamBuf, ok := inode.StreamMap[inodeStreamName] 2066 2067 if !ok { 2068 err = fmt.Errorf("No stream '%v'", inodeStreamName) 2069 return buf, blunder.AddError(err, blunder.StreamNotFound) 2070 } 2071 2072 buf = make([]byte, len(inodeStreamBuf)) 2073 2074 copy(buf, inodeStreamBuf) 2075 2076 err = nil 2077 2078 return 2079 } 2080 2081 func (vS *volumeStruct) PutStream(inodeNumber InodeNumber, inodeStreamName string, buf []byte) (err error) { 2082 err = enforceRWMode(false) 2083 if nil != err { 2084 return 2085 } 2086 2087 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 2088 if headhunter.SnapShotIDTypeLive != snapShotIDType { 2089 err = fmt.Errorf("PutStream() on non-LiveView inodeNumber not allowed") 2090 return 2091 } 2092 2093 inode, ok, err := vS.fetchInode(inodeNumber) 2094 if err != nil { 2095 // this indicates disk corruption or software error 2096 // (err includes volume name and inode number) 2097 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 2098 return err 2099 } 2100 if !ok { 2101 // disk corruption or client request for unallocated inode 2102 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 2103 utils.GetFnName(), inodeNumber, vS.volumeName) 2104 logger.InfoWithError(err) 2105 err = blunder.AddError(err, blunder.NotFoundError) 2106 return err 2107 } 2108 2109 inodeStreamBuf := make([]byte, len(buf)) 2110 2111 copy(inodeStreamBuf, buf) 2112 2113 inode.dirty = true 2114 inode.StreamMap[inodeStreamName] = inodeStreamBuf 2115 2116 updateTime := time.Now() 2117 inode.AttrChangeTime = updateTime 2118 2119 err = vS.flushInode(inode) 2120 if err != nil { 2121 logger.ErrorWithError(err) 2122 return err 2123 } 2124 2125 return 2126 } 2127 2128 func (vS *volumeStruct) DeleteStream(inodeNumber InodeNumber, inodeStreamName string) (err error) { 2129 err = enforceRWMode(false) 2130 if nil != err { 2131 return 2132 } 2133 2134 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 2135 if headhunter.SnapShotIDTypeLive != snapShotIDType { 2136 err = fmt.Errorf("DeleteStream() on non-LiveView inodeNumber not allowed") 2137 return 2138 } 2139 2140 inode, ok, err := vS.fetchInode(inodeNumber) 2141 if err != nil { 2142 // this indicates disk corruption or software error 2143 // (err includes volume name and inode number) 2144 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 2145 return 2146 } 2147 if !ok { 2148 // disk corruption or client request for unallocated inode 2149 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 2150 utils.GetFnName(), inodeNumber, vS.volumeName) 2151 logger.InfoWithError(err) 2152 err = blunder.AddError(err, blunder.NotFoundError) 2153 return 2154 } 2155 2156 inode.dirty = true 2157 delete(inode.StreamMap, inodeStreamName) 2158 2159 updateTime := time.Now() 2160 inode.AttrChangeTime = updateTime 2161 2162 err = vS.flushInode(inode) 2163 if err != nil { 2164 logger.ErrorWithError(err) 2165 return err 2166 } 2167 2168 return 2169 } 2170 2171 func (vS *volumeStruct) FetchLayoutReport(inodeNumber InodeNumber) (layoutReport sortedmap.LayoutReport, err error) { 2172 snapShotIDType, _, _ := vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 2173 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 2174 layoutReport = make(sortedmap.LayoutReport) 2175 err = nil 2176 return 2177 } 2178 2179 inode, ok, err := vS.fetchInode(inodeNumber) 2180 if err != nil { 2181 // this indicates disk corruption or software error 2182 // (err includes volume name and inode number) 2183 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 2184 return nil, err 2185 } 2186 if !ok { 2187 // disk corruption or client request for unallocated inode 2188 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 2189 utils.GetFnName(), inodeNumber, vS.volumeName) 2190 logger.InfoWithError(err) 2191 err = blunder.AddError(err, blunder.NotFoundError) 2192 return nil, err 2193 } 2194 2195 if SymlinkType == inode.InodeType { 2196 layoutReport = make(sortedmap.LayoutReport) 2197 err = nil 2198 } else { 2199 layoutReport, err = inode.payload.(sortedmap.BPlusTree).FetchLayoutReport() 2200 } 2201 2202 return 2203 } 2204 2205 func (vS *volumeStruct) FetchFragmentationReport(inodeNumber InodeNumber) (fragmentationReport FragmentationReport, err error) { 2206 err = fmt.Errorf("FetchFragmentationReport not yet implemented") 2207 return 2208 } 2209 2210 func (vS *volumeStruct) Optimize(inodeNumber InodeNumber, maxDuration time.Duration) (err error) { 2211 err = enforceRWMode(false) 2212 if nil != err { 2213 return 2214 } 2215 2216 err = fmt.Errorf("Optimize not yet implemented") 2217 return 2218 } 2219 2220 func validateFileExtents(snapShotID uint64, ourInode *inMemoryInodeStruct) (err error) { 2221 var ( 2222 zero = uint64(0) 2223 ) 2224 2225 readPlan, readPlanBytes, err := ourInode.volume.getReadPlanHelper(snapShotID, ourInode, &zero, nil) 2226 if err != nil { 2227 return err 2228 } 2229 2230 // We read the whole file, so these should match 2231 if readPlanBytes != ourInode.Size { 2232 return blunder.NewError(blunder.CorruptInodeError, "inode %v had recorded size %v bytes, but full read plan was only %v bytes", ourInode.InodeNumber, ourInode.Size, readPlanBytes) 2233 } 2234 2235 // Let's check that the read plan is consistent with what the inode's 2236 // internal log-segment map says about which segments should have how much data. 2237 // 2238 // Make a copy of the inode's LogSegmentMap map so we can decrement the 2239 // byte count for each segment as we walk the readPlan entries. 2240 remainingExpectedBytes := make(map[uint64]uint64) 2241 for segmentNumber, segmentBytesUsed := range ourInode.LogSegmentMap { 2242 remainingExpectedBytes[segmentNumber] += segmentBytesUsed 2243 } 2244 // Then we can compare with the actual read plan we got ... 2245 for _, readPlanStep := range readPlan { 2246 2247 // holes in a sparse file aren't counted 2248 if readPlanStep.LogSegmentNumber == 0 { 2249 continue 2250 } 2251 pathSegments := strings.Split(readPlanStep.ObjectPath, "/") 2252 logSegmentRepresentation := pathSegments[len(pathSegments)-1] 2253 logSegmentNumber, hexConvErr := utils.HexStrToUint64(logSegmentRepresentation) 2254 if hexConvErr != nil { 2255 return blunder.NewError(blunder.CorruptInodeError, 2256 "conversion of read plan object name to log segment number failed; "+ 2257 "readPlanStep: %v logSegmentString: '%v' err: %v", 2258 readPlanStep, logSegmentRepresentation, hexConvErr) 2259 } 2260 remainingExpectedBytes[logSegmentNumber] -= readPlanStep.Length 2261 } 2262 // ... and fail validation if any log segment didn't match. We'll put the 2263 // mismatches in a separate map that we'll attach to the error in case a 2264 // consumer or logger wants it. 2265 logSegmentByteCountMismatches := make(map[uint64]uint64) 2266 for logSegmentNumber, remainingExpectedByteCount := range remainingExpectedBytes { 2267 if remainingExpectedByteCount != 0 { 2268 logSegmentByteCountMismatches[logSegmentNumber] = remainingExpectedByteCount 2269 } 2270 } 2271 if len(logSegmentByteCountMismatches) != 0 { 2272 rootErr := fmt.Errorf("inconsistency detected between log segment map and read plan for inode %v", ourInode.InodeNumber) 2273 return merry.WithValue(blunder.AddError(rootErr, blunder.CorruptInodeError), "logSegmentByteCountMismatches", logSegmentByteCountMismatches) 2274 } 2275 2276 // Having verified that our read plan is consistent with our internal log 2277 // segment map, we also want to check that it's consistent with the actual log 2278 // segment objects in Swift. First, we'll construct a map of object paths to 2279 // the largest offset we would need read up to in that object. 2280 objectPathToEndOffset := make(map[string]uint64) 2281 2282 for _, planStep := range readPlan { 2283 2284 // holes in a sparse file don't have objects 2285 if planStep.LogSegmentNumber == 0 { 2286 continue 2287 } 2288 stepEndOffset := planStep.Offset + planStep.Length 2289 endOffset, ok := objectPathToEndOffset[planStep.ObjectPath] 2290 if !ok || stepEndOffset > endOffset { 2291 objectPathToEndOffset[planStep.ObjectPath] = stepEndOffset 2292 } 2293 } 2294 2295 // then, HEAD each object to make sure that it has enough bytes. 2296 for objectPath, endOffset := range objectPathToEndOffset { 2297 accountName, containerName, objectName, err := utils.PathToAcctContObj(objectPath) 2298 if err != nil { 2299 logger.ErrorWithError(err) 2300 return err 2301 } 2302 2303 contentLength, err := swiftclient.ObjectContentLength(accountName, containerName, objectName) 2304 if err != nil { 2305 logger.ErrorWithError(err) 2306 return err 2307 } 2308 2309 if contentLength < endOffset { 2310 // REVIEW: it might be helpful to continue and make a combined report of all 2311 // insufficiently long log segments, rather than erroring out immediately 2312 err = fmt.Errorf("expected %q to have at least %v bytes, content length was %v", objectPath, endOffset, contentLength) 2313 logger.ErrorWithError(err) 2314 return err 2315 } 2316 2317 } 2318 2319 return nil 2320 } 2321 2322 func (vS *volumeStruct) markCorrupted(inodeNumber InodeNumber) (err error) { 2323 var ( 2324 inodeRec []byte 2325 ok bool 2326 snapShotIDType headhunter.SnapShotIDType 2327 ) 2328 2329 snapShotIDType, _, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 2330 if headhunter.SnapShotIDTypeLive != snapShotIDType { 2331 err = blunder.NewError(blunder.InvalidArgError, "markCorrupted() of non-LiveView inodeNumber not allowed") 2332 return 2333 } 2334 2335 inodeRec, ok, err = vS.headhunterVolumeHandle.GetInodeRec(uint64(inodeNumber)) 2336 if nil == err && ok && (len(globals.corruptionDetectedTrueBuf) <= len(inodeRec)) { 2337 // Just overwrite CorruptionDetected field with true 2338 _ = copy(inodeRec, globals.corruptionDetectedTrueBuf) 2339 } else { 2340 // Use a simple CorruptionDetected == true inodeRec 2341 inodeRec = globals.corruptionDetectedTrueBuf 2342 } 2343 2344 err = vS.headhunterVolumeHandle.PutInodeRec(uint64(inodeNumber), inodeRec) 2345 2346 return 2347 } 2348 2349 func (vS *volumeStruct) Validate(inodeNumber InodeNumber, deeply bool) (err error) { 2350 var ( 2351 ok bool 2352 ourInode *inMemoryInodeStruct 2353 snapShotID uint64 2354 snapShotIDType headhunter.SnapShotIDType 2355 tree sortedmap.BPlusTree 2356 ) 2357 2358 snapShotIDType, snapShotID, _ = vS.headhunterVolumeHandle.SnapShotU64Decode(uint64(inodeNumber)) 2359 if headhunter.SnapShotIDTypeDotSnapShot == snapShotIDType { 2360 err = nil // Since /<SnapShotDirName> is emulated, always return success 2361 return 2362 } 2363 2364 // we don't want to use the in-memory cache for this; we'll need to fetch 2365 // the current real-world bits from disk. 2366 2367 // If this is a file inode, we flush to ensure that the inode is not dirty 2368 // (and that DLM locking therefore ensures we have exclusive access to the 2369 // inode and don't need to serialize this operation, as there can be no pending 2370 // time-based flush to race with). 2371 2372 err = vS.flushInodeNumber(inodeNumber) 2373 if nil != err { 2374 logger.ErrorfWithError(err, "couldn't flush inode %v", inodeNumber) 2375 err = blunder.AddError(err, blunder.CorruptInodeError) 2376 return 2377 } 2378 2379 err = vS.Purge(inodeNumber) 2380 if nil != err { 2381 logger.ErrorfWithError(err, "couldn't purge inode %v", inodeNumber) 2382 err = blunder.AddError(err, blunder.CorruptInodeError) 2383 return 2384 } 2385 2386 ourInode, ok, err = vS.fetchInode(inodeNumber) 2387 if nil != err { 2388 // this indicates diskj corruption or software error 2389 // (err includes volume name and inode number) 2390 logger.ErrorfWithError(err, "%s: fetch of inode failed", utils.GetFnName()) 2391 err = blunder.AddError(err, blunder.CorruptInodeError) 2392 return 2393 } 2394 if !ok { 2395 // disk corruption or client request for unallocated inode 2396 err = fmt.Errorf("%s: failing request for inode %d volume '%s' because it is unallocated", 2397 utils.GetFnName(), inodeNumber, vS.volumeName) 2398 logger.InfoWithError(err) 2399 err = blunder.AddError(err, blunder.NotFoundError) 2400 return 2401 } 2402 2403 switch ourInode.InodeType { 2404 case DirType, FileType: 2405 tree, ok = ourInode.payload.(sortedmap.BPlusTree) 2406 if !ok { 2407 err = fmt.Errorf("type conversion of inode %v payload to sortedmap.BPlusTree failed", ourInode.InodeNumber) 2408 err = blunder.AddError(err, blunder.CorruptInodeError) 2409 _ = vS.markCorrupted(inodeNumber) 2410 return 2411 } 2412 err = tree.Validate() 2413 if nil != err { 2414 err = blunder.AddError(err, blunder.CorruptInodeError) 2415 _ = vS.markCorrupted(inodeNumber) 2416 return 2417 } 2418 if FileType == ourInode.InodeType { 2419 if deeply { 2420 err = validateFileExtents(snapShotID, ourInode) 2421 if nil != err { 2422 err = blunder.AddError(err, blunder.CorruptInodeError) 2423 _ = vS.markCorrupted(inodeNumber) 2424 return 2425 } 2426 } 2427 } 2428 case SymlinkType: 2429 // Nothing to be done here 2430 default: 2431 err = fmt.Errorf("unrecognized inode type") 2432 err = blunder.AddError(err, blunder.CorruptInodeError) 2433 _ = vS.markCorrupted(inodeNumber) 2434 return 2435 } 2436 2437 err = nil 2438 return 2439 }