github.com/swiftstack/proxyfs@v0.0.0-20201223034610-5434d919416e/fs/api_internal.go (about) 1 // Package fs, sitting on top of the inode manager, defines the filesystem exposed by ProxyFS. 2 package fs 3 4 import ( 5 "bytes" 6 "container/list" 7 "fmt" 8 "math" 9 "path" 10 "strings" 11 "syscall" 12 "time" 13 14 "github.com/swiftstack/ProxyFS/blunder" 15 "github.com/swiftstack/ProxyFS/dlm" 16 "github.com/swiftstack/ProxyFS/inode" 17 "github.com/swiftstack/ProxyFS/logger" 18 "github.com/swiftstack/ProxyFS/utils" 19 ) 20 21 // Shorthand for our internal API debug log id; global to the package 22 var internalDebug = logger.DbgInternal 23 24 type symlinkFollowState struct { 25 seen map[inode.InodeNumber]bool 26 traversed int 27 } 28 29 // Let us sort an array of directory and file names 30 type dirAndFileName struct { 31 dirName string 32 fileName string 33 } 34 35 // this has to be a named type to be a method receiver 36 type dirAndFileNameSlice []dirAndFileName 37 38 func (coll dirAndFileNameSlice) Len() int { 39 return len(coll) 40 } 41 42 func (coll dirAndFileNameSlice) Less(i int, j int) bool { 43 return coll[i].dirName < coll[j].dirName 44 } 45 46 func (coll dirAndFileNameSlice) Swap(i int, j int) { 47 coll[i], coll[j] = coll[j], coll[i] 48 } 49 50 // trackInFlightFileInodeData is called to ensure a timely Flush occurs. 51 // 52 // Only Write() will call this while holding a WriteLock on the fileInode 53 // either just before or just after its call to inode.Write(). 54 func (vS *volumeStruct) trackInFlightFileInodeData(inodeNumber inode.InodeNumber) { 55 var ( 56 inFlightFileInodeData *inFlightFileInodeDataStruct 57 ok bool 58 ) 59 60 globals.Lock() 61 vS.dataMutex.Lock() 62 inFlightFileInodeData, ok = vS.inFlightFileInodeDataMap[inodeNumber] 63 if !ok { 64 inFlightFileInodeData = &inFlightFileInodeDataStruct{ 65 InodeNumber: inodeNumber, 66 volStruct: vS, 67 control: make(chan bool, inFlightFileInodeDataControlBuffering), 68 } 69 vS.inFlightFileInodeDataMap[inodeNumber] = inFlightFileInodeData 70 inFlightFileInodeData.globalsListElement = globals.inFlightFileInodeDataList.PushBack(inFlightFileInodeData) 71 inFlightFileInodeData.wg.Add(1) 72 go inFlightFileInodeData.inFlightFileInodeDataTracker() 73 } 74 vS.dataMutex.Unlock() 75 globals.Unlock() 76 } 77 78 // untrackInFlightInodeData is called once it is known a Flush() is no longer needed 79 // or to actually request a Flush() [as would be the case during unmounting a volume]. 80 func (vS *volumeStruct) untrackInFlightFileInodeData(inodeNumber inode.InodeNumber, flushFirst bool) { 81 var ( 82 inFlightFileInodeData *inFlightFileInodeDataStruct 83 ok bool 84 ) 85 86 globals.Lock() 87 vS.dataMutex.Lock() 88 inFlightFileInodeData, ok = vS.inFlightFileInodeDataMap[inodeNumber] 89 if !ok { 90 vS.dataMutex.Unlock() 91 globals.Unlock() 92 return 93 } 94 delete(vS.inFlightFileInodeDataMap, inodeNumber) 95 if nil != inFlightFileInodeData.globalsListElement { 96 _ = globals.inFlightFileInodeDataList.Remove(inFlightFileInodeData.globalsListElement) 97 inFlightFileInodeData.globalsListElement = nil 98 } 99 inFlightFileInodeData.control <- flushFirst 100 vS.dataMutex.Unlock() 101 globals.Unlock() 102 if flushFirst { 103 inFlightFileInodeData.wg.Wait() 104 } 105 } 106 107 // untrackInFlightFileInodeDataAll is called to flush all current elements 108 // of vS.inFlightFileInodeDataMap (if any) during SIGHUP or Down(). 109 func (vS *volumeStruct) untrackInFlightFileInodeDataAll() { 110 var ( 111 inFlightFileInodeNumber inode.InodeNumber 112 inFlightFileInodeNumbers []inode.InodeNumber 113 inFlightFileInodeNumbersCapacity int 114 ) 115 116 // Snapshot list of inode.InodeNumber's currently in vS.inFlightFileInodeDataMap 117 118 vS.dataMutex.Lock() 119 inFlightFileInodeNumbersCapacity = len(vS.inFlightFileInodeDataMap) 120 if 0 == inFlightFileInodeNumbersCapacity { 121 vS.dataMutex.Unlock() 122 return 123 } 124 inFlightFileInodeNumbers = make([]inode.InodeNumber, 0, inFlightFileInodeNumbersCapacity) 125 for inFlightFileInodeNumber, _ = range vS.inFlightFileInodeDataMap { 126 inFlightFileInodeNumbers = append(inFlightFileInodeNumbers, inFlightFileInodeNumber) 127 } 128 vS.dataMutex.Unlock() 129 130 // Now go flush each of those 131 132 for _, inFlightFileInodeNumber = range inFlightFileInodeNumbers { 133 vS.untrackInFlightFileInodeData(inFlightFileInodeNumber, true) 134 } 135 } 136 137 func (vS *volumeStruct) inFlightFileInodeDataFlusher(inodeNumber inode.InodeNumber) { 138 var ( 139 err error 140 inodeLock *dlm.RWLockStruct 141 stillExists bool 142 ) 143 144 // Act as if a package fs client called Flush()... 145 146 inodeLock, err = vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 147 if nil != err { 148 logger.PanicfWithError(err, "InitInodeLock() for volume '%s' inode %v failed", vS.volumeName, inodeNumber) 149 } 150 err = inodeLock.WriteLock() 151 if nil != err { 152 logger.PanicfWithError(err, "dlm.Writelock() for volume '%s' inode %v failed", vS.volumeName, inodeNumber) 153 } 154 155 stillExists = vS.inodeVolumeHandle.Access(inodeNumber, inode.InodeRootUserID, inode.InodeGroupID(0), nil, inode.F_OK, 156 inode.NoOverride) 157 if stillExists { 158 err = vS.inodeVolumeHandle.Flush(inodeNumber, false) 159 if nil == err { 160 vS.untrackInFlightFileInodeData(inodeNumber, false) 161 } else { 162 logger.ErrorfWithError(err, "Flush of file data failed on volume '%s' inode %v", vS.volumeName, inodeNumber) 163 } 164 } 165 166 err = inodeLock.Unlock() 167 if nil != err { 168 logger.PanicfWithError(err, "dlm.Unlock() for volume '%s' inode %v failed", vS.volumeName, inodeNumber) 169 } 170 } 171 172 func (inFlightFileInodeData *inFlightFileInodeDataStruct) inFlightFileInodeDataTracker() { 173 var ( 174 flushFirst bool 175 ) 176 177 logger.Tracef("fs.inFlightFileInodeDataTracker(): waiting to flush volume '%s' inode %v", 178 inFlightFileInodeData.volStruct.volumeName, inFlightFileInodeData.InodeNumber) 179 180 select { 181 case flushFirst = <-inFlightFileInodeData.control: 182 // All we needed was the value of flushFirst from control chan 183 case <-time.After(inFlightFileInodeData.volStruct.maxFlushTime): 184 flushFirst = true 185 } 186 187 logger.Tracef("fs.inFlightFileInodeDataTracker(): flush starting for volume '%s' inode %v flushfirst %t", 188 inFlightFileInodeData.volStruct.volumeName, inFlightFileInodeData.InodeNumber, flushFirst) 189 190 if flushFirst { 191 inFlightFileInodeData.volStruct.inFlightFileInodeDataFlusher(inFlightFileInodeData.InodeNumber) 192 } 193 194 inFlightFileInodeData.wg.Done() 195 } 196 197 func fetchVolumeHandleByAccountName(accountName string) (volumeHandle VolumeHandle, err error) { 198 var ( 199 ok bool 200 vS *volumeStruct 201 volumeName string 202 ) 203 204 startTime := time.Now() 205 defer func() { 206 globals.FetchVolumeHandleUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 207 if err != nil { 208 globals.FetchVolumeHandleErrors.Add(1) 209 } 210 }() 211 212 globals.Lock() 213 214 volumeName, ok = inode.AccountNameToVolumeName(accountName) 215 if !ok { 216 err = fmt.Errorf("Unknown accountName passed to mountByAccountName(): \"%s\"", accountName) 217 err = blunder.AddError(err, blunder.NotFoundError) 218 globals.Unlock() 219 return 220 } 221 222 vS, ok = globals.volumeMap[volumeName] 223 if !ok { 224 err = fmt.Errorf("Unknown volumeName computed by mountByAccountName(): \"%s\"", volumeName) 225 err = blunder.AddError(err, blunder.NotFoundError) 226 globals.Unlock() 227 return 228 } 229 230 globals.Unlock() 231 232 volumeHandle = vS 233 err = nil 234 235 return 236 } 237 238 func fetchVolumeHandleByVolumeName(volumeName string) (volumeHandle VolumeHandle, err error) { 239 var ( 240 ok bool 241 vS *volumeStruct 242 ) 243 244 startTime := time.Now() 245 defer func() { 246 globals.FetchVolumeHandleUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 247 if err != nil { 248 globals.FetchVolumeHandleErrors.Add(1) 249 } 250 }() 251 252 globals.Lock() 253 254 vS, ok = globals.volumeMap[volumeName] 255 if !ok { 256 err = fmt.Errorf("Unknown volumeName passed to mountByVolumeName(): \"%s\"", volumeName) 257 err = blunder.AddError(err, blunder.NotFoundError) 258 globals.Unlock() 259 return 260 } 261 262 globals.Unlock() 263 264 volumeHandle = vS 265 err = nil 266 267 return 268 } 269 270 func (vS *volumeStruct) Access(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, accessMode inode.InodeMode) (accessReturn bool) { 271 startTime := time.Now() 272 defer func() { 273 globals.AccessUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 274 }() 275 276 vS.jobRWMutex.RLock() 277 defer vS.jobRWMutex.RUnlock() 278 279 accessReturn = vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, accessMode, 280 inode.NoOverride) 281 return 282 } 283 284 func (vS *volumeStruct) CallInodeToProvisionObject() (pPath string, err error) { 285 startTime := time.Now() 286 defer func() { 287 globals.CallInodeToProvisionObjectUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 288 if err != nil { 289 globals.CallInodeToProvisionObjectErrors.Add(1) 290 } 291 }() 292 293 vS.jobRWMutex.RLock() 294 defer vS.jobRWMutex.RUnlock() 295 296 pPath, err = vS.inodeVolumeHandle.ProvisionObject() 297 return 298 } 299 300 func (vS *volumeStruct) Create(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string, filePerm inode.InodeMode) (fileInodeNumber inode.InodeNumber, err error) { 301 startTime := time.Now() 302 defer func() { 303 globals.CreateUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 304 if err != nil { 305 globals.CreateErrors.Add(1) 306 } 307 }() 308 309 vS.jobRWMutex.RLock() 310 defer vS.jobRWMutex.RUnlock() 311 312 err = validateBaseName(basename) 313 if err != nil { 314 return 0, err 315 } 316 317 // Lock the directory inode before doing the link 318 dirInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(dirInodeNumber, nil) 319 if err != nil { 320 return 0, err 321 } 322 err = dirInodeLock.WriteLock() 323 if err != nil { 324 return 0, err 325 } 326 defer dirInodeLock.Unlock() 327 328 if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 329 inode.NoOverride) { 330 return 0, blunder.NewError(blunder.NotFoundError, "ENOENT") 331 } 332 if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK, 333 inode.NoOverride) { 334 return 0, blunder.NewError(blunder.PermDeniedError, "EACCES") 335 } 336 337 // create the file and add it to the directory 338 fileInodeNumber, err = vS.inodeVolumeHandle.CreateFile(filePerm, userID, groupID) 339 if err != nil { 340 return 0, err 341 } 342 343 err = vS.inodeVolumeHandle.Link(dirInodeNumber, basename, fileInodeNumber, false) 344 if err != nil { 345 destroyErr := vS.inodeVolumeHandle.Destroy(fileInodeNumber) 346 if destroyErr != nil { 347 logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Link() in fs.Create", fileInodeNumber) 348 } 349 return 0, err 350 } 351 352 return fileInodeNumber, nil 353 } 354 355 func (vS *volumeStruct) DefragmentFile(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fileInodeNumber inode.InodeNumber) (err error) { 356 var ( 357 eofReached bool 358 fileOffset uint64 359 inodeLock *dlm.RWLockStruct 360 inodeType inode.InodeType 361 ) 362 363 startTime := time.Now() 364 defer func() { 365 globals.DefragmentFileUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 366 if err != nil { 367 globals.DefragmentFileErrors.Add(1) 368 } 369 }() 370 371 vS.jobRWMutex.RLock() 372 373 inodeLock, err = vS.inodeVolumeHandle.InitInodeLock(fileInodeNumber, nil) 374 if nil != err { 375 vS.jobRWMutex.RUnlock() 376 return 377 } 378 err = inodeLock.WriteLock() 379 if nil != err { 380 vS.jobRWMutex.RUnlock() 381 return 382 } 383 384 if !vS.inodeVolumeHandle.Access(fileInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 385 inode.NoOverride) { 386 _ = inodeLock.Unlock() 387 vS.jobRWMutex.RUnlock() 388 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 389 return 390 } 391 if !vS.inodeVolumeHandle.Access(fileInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK, 392 inode.OwnerOverride) { 393 _ = inodeLock.Unlock() 394 vS.jobRWMutex.RUnlock() 395 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 396 return 397 } 398 399 inodeType, err = vS.inodeVolumeHandle.GetType(fileInodeNumber) 400 if nil != err { 401 _ = inodeLock.Unlock() 402 vS.jobRWMutex.RUnlock() 403 logger.ErrorfWithError(err, "couldn't get type for inode %v", fileInodeNumber) 404 return 405 } 406 // Make sure the inode number is for a file inode 407 if inodeType != inode.FileType { 408 _ = inodeLock.Unlock() 409 vS.jobRWMutex.RUnlock() 410 err = fmt.Errorf("%s: expected inode %v to be a file inode, got %v", utils.GetFnName(), fileInodeNumber, inodeType) 411 logger.ErrorWithError(err) 412 err = blunder.AddError(err, blunder.NotFileError) 413 return 414 } 415 416 fileOffset = 0 417 418 for { 419 fileOffset, eofReached, err = vS.inodeVolumeHandle.DefragmentFile(fileInodeNumber, fileOffset, vS.fileDefragmentChunkSize) 420 _ = inodeLock.Unlock() 421 vS.jobRWMutex.RUnlock() 422 if nil != err { 423 return 424 } 425 if eofReached { 426 return 427 } 428 time.Sleep(vS.fileDefragmentChunkDelay) 429 vS.jobRWMutex.RLock() 430 err = inodeLock.WriteLock() 431 if nil != err { 432 vS.jobRWMutex.RUnlock() 433 return 434 } 435 } 436 } 437 438 func (vS *volumeStruct) FetchExtentMapChunk(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fileInodeNumber inode.InodeNumber, fileOffset uint64, maxEntriesFromFileOffset int64, maxEntriesBeforeFileOffset int64) (extentMapChunk *inode.ExtentMapChunkStruct, err error) { 439 var ( 440 inodeLock *dlm.RWLockStruct 441 inodeType inode.InodeType 442 ) 443 444 startTime := time.Now() 445 defer func() { 446 globals.FetchExtentMapChunkUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 447 if err != nil { 448 globals.FetchExtentMapChunkErrors.Add(1) 449 } 450 }() 451 452 vS.jobRWMutex.RLock() 453 defer vS.jobRWMutex.RUnlock() 454 455 inodeLock, err = vS.inodeVolumeHandle.InitInodeLock(fileInodeNumber, nil) 456 if nil != err { 457 return 458 } 459 err = inodeLock.ReadLock() 460 if nil != err { 461 return 462 } 463 defer inodeLock.Unlock() 464 465 if !vS.inodeVolumeHandle.Access(fileInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 466 inode.NoOverride) { 467 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 468 return 469 } 470 if !vS.inodeVolumeHandle.Access(fileInodeNumber, userID, groupID, otherGroupIDs, inode.R_OK, 471 inode.OwnerOverride) { 472 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 473 return 474 } 475 476 inodeType, err = vS.inodeVolumeHandle.GetType(fileInodeNumber) 477 if nil != err { 478 logger.ErrorfWithError(err, "couldn't get type for inode %v", fileInodeNumber) 479 return 480 } 481 // Make sure the inode number is for a file inode 482 if inodeType != inode.FileType { 483 err = fmt.Errorf("%s: expected inode %v to be a file inode, got %v", utils.GetFnName(), fileInodeNumber, inodeType) 484 logger.ErrorWithError(err) 485 err = blunder.AddError(err, blunder.NotFileError) 486 return 487 } 488 489 extentMapChunk, err = vS.inodeVolumeHandle.FetchExtentMapChunk(fileInodeNumber, fileOffset, maxEntriesFromFileOffset, maxEntriesBeforeFileOffset) 490 491 return 492 } 493 494 // doInlineCheckpointIfEnabled is called whenever we must guarantee that reported state changes 495 // are, indeed, persisted. Absent any sort of persistent transaction log, this means performing 496 // a checkpoint unfortunately. 497 // 498 // Currently, only explicitly invoked Flushes trigger this. But, actually, any Swift/S3 API call 499 // that modifies Objects or (what the client thinks are) Containers should also. 500 // 501 // TODO is to determine where else a call to this func should also be made. 502 // 503 func (vS *volumeStruct) doInlineCheckpointIfEnabled() { 504 var ( 505 err error 506 ) 507 508 if !vS.doCheckpointPerFlush { 509 return 510 } 511 512 err = vS.headhunterVolumeHandle.DoCheckpoint() 513 if nil != err { 514 logger.Fatalf("fs.doInlineCheckpoint() call to headhunter.DoCheckpoint() failed: %v", err) 515 } 516 } 517 518 func (vS *volumeStruct) Flush(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (err error) { 519 startTime := time.Now() 520 defer func() { 521 globals.FlushUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 522 if err != nil { 523 globals.FlushErrors.Add(1) 524 } 525 }() 526 527 vS.jobRWMutex.RLock() 528 defer vS.jobRWMutex.RUnlock() 529 530 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 531 if err != nil { 532 return 533 } 534 err = inodeLock.WriteLock() 535 if err != nil { 536 return 537 } 538 defer inodeLock.Unlock() 539 540 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 541 inode.NoOverride) { 542 return blunder.NewError(blunder.NotFoundError, "ENOENT") 543 } 544 545 // Note: We'd normally check EACCES here...but there are paths in FUSE (e.g. when files are 546 // closed) that end up calling Flush()...even though the file was "opened" ReadOnly. 547 // This is presumably to support updated of ATime and such. In any event, an EACCESS 548 // check would fail if the caller actually only had ReadOnly access to the Inode, so 549 // we won't be doing the check here. 550 551 err = vS.inodeVolumeHandle.Flush(inodeNumber, false) 552 vS.untrackInFlightFileInodeData(inodeNumber, false) 553 554 vS.doInlineCheckpointIfEnabled() 555 556 return 557 } 558 559 func (vS *volumeStruct) getFileLockList(inodeNumber inode.InodeNumber) (flockList *list.List) { 560 vS.dataMutex.Lock() 561 defer vS.dataMutex.Unlock() 562 563 flockList, ok := vS.FLockMap[inodeNumber] 564 if !ok { 565 flockList = new(list.List) 566 vS.FLockMap[inodeNumber] = flockList 567 } 568 569 return 570 } 571 572 // Check for lock conflict with other Pids, if there is a conflict then it will return the first occurance of conflicting range. 573 func checkConflict(elm *FlockStruct, flock *FlockStruct) bool { 574 575 if flock.Pid == elm.Pid { 576 return false 577 } 578 579 if (elm.Start+elm.Len) <= flock.Start || (flock.Start+flock.Len) <= elm.Start { 580 return false 581 } 582 583 if (flock.Type == syscall.F_WRLCK) || (elm.Type == syscall.F_WRLCK) { 584 return true 585 } 586 587 return false 588 } 589 590 func (vS *volumeStruct) verifyLock(inodeNumber inode.InodeNumber, flock *FlockStruct) (conflictLock *FlockStruct) { 591 flockList := vS.getFileLockList(inodeNumber) 592 593 for e := flockList.Front(); e != nil; e = e.Next() { 594 elm := e.Value.(*FlockStruct) 595 596 if checkConflict(elm, flock) == true { 597 return elm 598 } 599 } 600 601 return nil 602 } 603 604 // Insert a file lock range to corresponding lock list for the pid. 605 // Assumption: There is no lock conflict and the range that is being inserted has no conflict and is free. 606 func (vS *volumeStruct) fileLockInsert(inodeNumber inode.InodeNumber, inFlock *FlockStruct) (err error) { 607 err = nil 608 flockList := vS.getFileLockList(inodeNumber) 609 610 overlapList := new(list.List) 611 var beforeElm *list.Element // Refers to the immediate element that starts before the start of the range. 612 var afterElm *list.Element // Refers to the immediate element that starts after the end of the range. 613 614 // flockList is sorted by starting offset of the range. 615 // Inserting a range happens in two steps. 1) Check if there is any conflict and also identify the 616 // point in the list where the entry will be added (before and after elements) 2) Then check if 617 // the range can extend the before element, if so adjust it. 3) Simillarly, check if the after 618 // element can be collapsed if it forms a contiguous range. 619 620 for e := flockList.Front(); e != nil; e = e.Next() { 621 elm := e.Value.(*FlockStruct) 622 623 if (elm.Start + elm.Len) <= inFlock.Start { 624 beforeElm = e 625 continue 626 } 627 628 if elm.Start > (inFlock.Start + inFlock.Len) { 629 afterElm = e 630 if overlapList.Len() == 0 { 631 flockList.InsertBefore(inFlock, e) 632 return 633 } 634 635 break 636 } 637 638 if checkConflict(elm, inFlock) { 639 err = blunder.AddError(nil, blunder.TryAgainError) 640 return 641 } 642 643 if elm.Pid == inFlock.Pid { 644 overlapList.PushBack(e) 645 } 646 } 647 648 if overlapList.Len() == 0 { 649 if beforeElm != nil { 650 elm := beforeElm.Value.(*FlockStruct) 651 if elm.Pid == inFlock.Pid && elm.Type == inFlock.Type && (elm.Start+elm.Len) == inFlock.Start { 652 elm.Len = inFlock.Start + inFlock.Len - elm.Len 653 } else { 654 flockList.InsertAfter(inFlock, beforeElm) 655 } 656 } else { 657 flockList.PushBack(inFlock) 658 } 659 660 return 661 } 662 663 // Look at the last element in the overlapping list 664 lastEnt := overlapList.Back() 665 e := lastEnt.Value.(*list.Element) 666 elm := e.Value.(*FlockStruct) 667 if (elm.Start + elm.Len) > (inFlock.Start + inFlock.Len) { 668 inFlock.Len = (elm.Start + elm.Len) - inFlock.Start 669 } 670 671 // We can delete all the entries in the overlapping list. These entries are replaced by 672 // the range we are inserting. 673 for e := overlapList.Front(); e != nil; e = e.Next() { 674 entry := e.Value.(*list.Element) 675 flockList.Remove(entry) 676 } 677 678 // Now adjust the before and after entries: 679 // First adjust the after: 680 if afterElm != nil { 681 elm := afterElm.Value.(*FlockStruct) 682 if elm.Pid == inFlock.Pid && elm.Type == inFlock.Type && (inFlock.Start+inFlock.Len) == elm.Start { 683 // We can collapse the entry: 684 elm.Len = elm.Start + elm.Len - inFlock.Start 685 elm.Start = inFlock.Start 686 687 if beforeElm != nil { 688 belm := beforeElm.Value.(*FlockStruct) 689 if belm.Pid == elm.Pid && belm.Type == elm.Type && (belm.Start+belm.Len) == elm.Start { 690 belm.Len = elm.Start + elm.Len - belm.Start 691 flockList.Remove(afterElm) 692 } 693 } 694 695 return 696 } 697 } 698 699 if beforeElm != nil { 700 belm := beforeElm.Value.(*FlockStruct) 701 if belm.Pid == inFlock.Pid && belm.Type == inFlock.Type && (belm.Start+belm.Len) == inFlock.Start { 702 belm.Len = inFlock.Start + inFlock.Len - belm.Start 703 } 704 705 flockList.InsertAfter(inFlock, beforeElm) 706 return 707 } 708 709 if afterElm != nil { 710 flockList.InsertBefore(inFlock, afterElm) 711 } else { 712 flockList.PushBack(inFlock) 713 } 714 715 return 716 717 } 718 719 // Unlock a given range. All locks held in this range by the process (identified by Pid) are removed. 720 func (vS *volumeStruct) fileUnlock(inodeNumber inode.InodeNumber, inFlock *FlockStruct) (err error) { 721 722 flockList := vS.getFileLockList(inodeNumber) 723 if flockList == nil { 724 logger.Warnf("Unlock of a region not already locked - %+v", inFlock) 725 return 726 } 727 728 start := inFlock.Start 729 len := inFlock.Len 730 731 removeList := new(list.List) 732 733 for e := flockList.Front(); e != nil; e = e.Next() { 734 elm := e.Value.(*FlockStruct) 735 736 if elm.Pid != inFlock.Pid { 737 continue 738 } 739 740 if (elm.Start + elm.Len) < start { 741 continue 742 } 743 744 if elm.Start >= (start + len) { 745 break 746 } 747 748 // If the lock falls completely in the range, delete it. 749 if elm.Start >= start && (elm.Start+elm.Len) <= (start+len) { 750 removeList.PushBack(e) 751 continue 752 } 753 754 // This lock overlapps with the range - three possibalities 1) lock starts before the range, 2) end after range and 3) both. 755 756 elmLen := elm.Start + elm.Len // Save the original length, it is required in case of #3 (both) 757 758 if elm.Start < start { // Handle the first part - lock starts before the range. 759 elm.Len = start - elm.Start 760 } 761 762 if elmLen > (start + len) { // Lock extends beyond the unlock range. 763 if elm.Start > start { // case #2 764 // use the existing record 765 elm.Start = start + len 766 elm.Len = elmLen - elm.Start 767 break 768 } 769 770 // Create a new record - handle case #3 both (starts before the range and extends beyond the range) 771 elmTail := new(FlockStruct) 772 elmTail.Start = start + len 773 elmTail.Len = elmLen - elm.Start 774 elmTail.Pid = elm.Pid 775 elmTail.Type = elm.Type 776 elmTail.Whence = elm.Whence 777 flockList.InsertAfter(elmTail, e) 778 break 779 } 780 } 781 782 for e := removeList.Front(); e != nil; e = e.Next() { 783 elm := e.Value.(*list.Element) 784 flockList.Remove(elm) 785 } 786 787 return 788 } 789 790 // Implements file locking conforming to fcntl(2) locking description. F_SETLKW is not implemented. Supports F_SETLW and F_GETLW. 791 // whence: FS supports only SEEK_SET - starting from 0, since it does not manage file handles, caller is expected to supply the start and length relative to offset ZERO. 792 func (vS *volumeStruct) Flock(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, lockCmd int32, inFlock *FlockStruct) (outFlock *FlockStruct, err error) { 793 startTime := time.Now() 794 defer func() { 795 switch lockCmd { 796 797 case syscall.F_GETLK: 798 globals.FlockGetUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 799 if err != nil { 800 globals.FlockGetErrors.Add(1) 801 } 802 803 case syscall.F_SETLK: 804 if inFlock.Type == syscall.F_UNLCK { 805 globals.FlockUnlockUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 806 if err != nil { 807 globals.FlockUnlockErrors.Add(1) 808 } 809 810 } else if inFlock.Type == syscall.F_WRLCK || inFlock.Type == syscall.F_RDLCK { 811 globals.FlockLockUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 812 if err != nil { 813 globals.FlockLockErrors.Add(1) 814 } 815 } else { 816 globals.FlockOtherErrors.Add(1) 817 } 818 819 default: 820 globals.FlockOtherErrors.Add(1) 821 } 822 823 }() 824 825 vS.jobRWMutex.RLock() 826 defer vS.jobRWMutex.RUnlock() 827 828 outFlock = inFlock 829 830 if lockCmd == syscall.F_SETLKW { 831 err = blunder.AddError(nil, blunder.NotSupportedError) 832 return 833 } 834 835 // Make sure the inode does not go away, while we are applying the flock. 836 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 837 if err != nil { 838 return 839 } 840 err = inodeLock.ReadLock() 841 if err != nil { 842 return 843 } 844 defer inodeLock.Unlock() 845 846 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, inode.NoOverride) { 847 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 848 return 849 } 850 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK, inode.OwnerOverride) { 851 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 852 return 853 } 854 855 if inFlock.Len == 0 { // If length is ZERO means treat it as whole file. 856 inFlock.Len = ^uint64(0) 857 } 858 859 switch lockCmd { 860 case syscall.F_GETLK: 861 conflictLock := vS.verifyLock(inodeNumber, inFlock) 862 if conflictLock != nil { 863 outFlock = conflictLock 864 err = blunder.AddError(nil, blunder.TryAgainError) 865 } else { 866 outFlock = inFlock 867 outFlock.Type = syscall.F_UNLCK 868 } 869 break 870 871 case syscall.F_SETLK: 872 if inFlock.Type == syscall.F_UNLCK { 873 err = vS.fileUnlock(inodeNumber, inFlock) 874 875 } else if inFlock.Type == syscall.F_WRLCK || inFlock.Type == syscall.F_RDLCK { 876 err = vS.fileLockInsert(inodeNumber, inFlock) 877 878 } else { 879 err = blunder.NewError(blunder.InvalidArgError, "EINVAL") 880 return 881 } 882 break 883 884 default: 885 err = blunder.NewError(blunder.InvalidArgError, "EINVAL") 886 return 887 } 888 889 return 890 } 891 892 func (vS *volumeStruct) getstatHelper(inodeNumber inode.InodeNumber, callerID dlm.CallerID) (stat Stat, err error) { 893 894 lockID, err := vS.inodeVolumeHandle.MakeLockID(inodeNumber) 895 if err != nil { 896 return 897 } 898 if !dlm.IsLockHeld(lockID, callerID, dlm.ANYLOCK) { 899 err = fmt.Errorf("%s: inode %v lock must be held before calling", utils.GetFnName(), inodeNumber) 900 return nil, blunder.AddError(err, blunder.NotFoundError) 901 } 902 903 stat, err = vS.getstatHelperWhileLocked(inodeNumber) 904 905 return 906 } 907 908 func (vS *volumeStruct) getstatHelperWhileLocked(inodeNumber inode.InodeNumber) (stat Stat, err error) { 909 var ( 910 metadata *inode.MetadataStruct 911 ) 912 913 metadata, err = vS.inodeVolumeHandle.GetMetadata(inodeNumber) 914 if nil != err { 915 return 916 } 917 918 stat = make(map[StatKey]uint64) 919 920 stat[StatCRTime] = uint64(metadata.CreationTime.UnixNano()) 921 stat[StatMTime] = uint64(metadata.ModificationTime.UnixNano()) 922 stat[StatCTime] = uint64(metadata.AttrChangeTime.UnixNano()) 923 stat[StatATime] = uint64(metadata.AccessTime.UnixNano()) 924 stat[StatSize] = metadata.Size 925 stat[StatNLink] = metadata.LinkCount 926 stat[StatFType] = uint64(metadata.InodeType) 927 stat[StatINum] = uint64(inodeNumber) 928 stat[StatMode] = uint64(metadata.Mode) 929 stat[StatUserID] = uint64(metadata.UserID) 930 stat[StatGroupID] = uint64(metadata.GroupID) 931 stat[StatNumWrites] = metadata.NumWrites 932 933 return 934 } 935 936 func (vS *volumeStruct) Getstat(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (stat Stat, err error) { 937 startTime := time.Now() 938 defer func() { 939 globals.GetstatUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 940 if err != nil { 941 globals.GetstatErrors.Add(1) 942 } 943 }() 944 945 vS.jobRWMutex.RLock() 946 defer vS.jobRWMutex.RUnlock() 947 948 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 949 if err != nil { 950 return 951 } 952 err = inodeLock.ReadLock() 953 if err != nil { 954 return 955 } 956 defer inodeLock.Unlock() 957 958 // Call getstat helper function to do the work 959 return vS.getstatHelper(inodeNumber, inodeLock.GetCallerID()) 960 } 961 962 func (vS *volumeStruct) getTypeHelper(inodeNumber inode.InodeNumber, callerID dlm.CallerID) (inodeType inode.InodeType, err error) { 963 964 lockID, err := vS.inodeVolumeHandle.MakeLockID(inodeNumber) 965 if err != nil { 966 return 967 } 968 if !dlm.IsLockHeld(lockID, callerID, dlm.ANYLOCK) { 969 err = fmt.Errorf("%s: inode %v lock must be held before calling.", utils.GetFnName(), inodeNumber) 970 err = blunder.AddError(err, blunder.NotFoundError) 971 return 972 } 973 974 inodeType, err = vS.inodeVolumeHandle.GetType(inodeNumber) 975 if err != nil { 976 logger.ErrorWithError(err, "couldn't get inode type") 977 return inodeType, err 978 } 979 return inodeType, nil 980 } 981 982 func (vS *volumeStruct) GetType(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeType inode.InodeType, err error) { 983 startTime := time.Now() 984 defer func() { 985 globals.GetTypeUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 986 if err != nil { 987 globals.GetTypeErrors.Add(1) 988 } 989 }() 990 991 vS.jobRWMutex.RLock() 992 defer vS.jobRWMutex.RUnlock() 993 994 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 995 if err != nil { 996 return 997 } 998 err = inodeLock.ReadLock() 999 if err != nil { 1000 return 1001 } 1002 defer inodeLock.Unlock() 1003 1004 return vS.getTypeHelper(inodeNumber, inodeLock.GetCallerID()) 1005 } 1006 1007 func (vS *volumeStruct) GetXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string) (value []byte, err error) { 1008 startTime := time.Now() 1009 defer func() { 1010 globals.GetXAttrUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 1011 if err != nil { 1012 globals.GetXAttrErrors.Add(1) 1013 } 1014 }() 1015 1016 vS.jobRWMutex.RLock() 1017 defer vS.jobRWMutex.RUnlock() 1018 1019 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 1020 if err != nil { 1021 return 1022 } 1023 err = inodeLock.ReadLock() 1024 if err != nil { 1025 return 1026 } 1027 defer inodeLock.Unlock() 1028 1029 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 1030 inode.NoOverride) { 1031 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 1032 return 1033 } 1034 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK, 1035 inode.OwnerOverride) { 1036 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 1037 return 1038 } 1039 1040 value, err = vS.inodeVolumeHandle.GetStream(inodeNumber, streamName) 1041 if err != nil { 1042 // Did not find the requested stream. However this isn't really an error since 1043 // samba will ask for acl-related streams and is fine with not finding them. 1044 logger.TracefWithError(err, "Failed to get XAttr %v of inode %v", streamName, inodeNumber) 1045 } 1046 1047 return 1048 } 1049 1050 func (vS *volumeStruct) IsDir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsDir bool, err error) { 1051 startTime := time.Now() 1052 defer func() { 1053 globals.IsDirUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 1054 if err != nil { 1055 globals.IsDirErrors.Add(1) 1056 } 1057 }() 1058 1059 vS.jobRWMutex.RLock() 1060 defer vS.jobRWMutex.RUnlock() 1061 1062 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 1063 if err != nil { 1064 return 1065 } 1066 err = inodeLock.ReadLock() 1067 if err != nil { 1068 return 1069 } 1070 defer inodeLock.Unlock() 1071 1072 lockID, err := vS.inodeVolumeHandle.MakeLockID(inodeNumber) 1073 if err != nil { 1074 return 1075 } 1076 if !dlm.IsLockHeld(lockID, inodeLock.GetCallerID(), dlm.ANYLOCK) { 1077 err = fmt.Errorf("%s: inode %v lock must be held before calling", utils.GetFnName(), inodeNumber) 1078 return false, blunder.AddError(err, blunder.NotFoundError) 1079 } 1080 1081 inodeType, err := vS.inodeVolumeHandle.GetType(inodeNumber) 1082 if err != nil { 1083 return false, err 1084 } 1085 return inodeType == inode.DirType, nil 1086 } 1087 1088 func (vS *volumeStruct) IsFile(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsFile bool, err error) { 1089 startTime := time.Now() 1090 defer func() { 1091 globals.IsFileUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 1092 if err != nil { 1093 globals.IsFileErrors.Add(1) 1094 } 1095 }() 1096 1097 vS.jobRWMutex.RLock() 1098 defer vS.jobRWMutex.RUnlock() 1099 1100 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 1101 if err != nil { 1102 return 1103 } 1104 err = inodeLock.ReadLock() 1105 if err != nil { 1106 return 1107 } 1108 defer inodeLock.Unlock() 1109 1110 inodeType, err := vS.inodeVolumeHandle.GetType(inodeNumber) 1111 if err != nil { 1112 return false, err 1113 } 1114 1115 return inodeType == inode.FileType, nil 1116 } 1117 1118 func (vS *volumeStruct) IsSymlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (inodeIsSymlink bool, err error) { 1119 startTime := time.Now() 1120 defer func() { 1121 globals.IsSymlinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 1122 if err != nil { 1123 globals.IsSymlinkErrors.Add(1) 1124 } 1125 }() 1126 1127 vS.jobRWMutex.RLock() 1128 defer vS.jobRWMutex.RUnlock() 1129 1130 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 1131 if err != nil { 1132 return 1133 } 1134 err = inodeLock.ReadLock() 1135 if err != nil { 1136 return 1137 } 1138 defer inodeLock.Unlock() 1139 1140 inodeType, err := vS.inodeVolumeHandle.GetType(inodeNumber) 1141 if err != nil { 1142 return false, err 1143 } 1144 1145 return inodeType == inode.SymlinkType, nil 1146 } 1147 1148 func (vS *volumeStruct) Link(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string, targetInodeNumber inode.InodeNumber) (err error) { 1149 startTime := time.Now() 1150 defer func() { 1151 globals.LinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 1152 if err != nil { 1153 globals.LinkErrors.Add(1) 1154 } 1155 }() 1156 1157 vS.jobRWMutex.RLock() 1158 defer vS.jobRWMutex.RUnlock() 1159 1160 var ( 1161 inodeType inode.InodeType 1162 ) 1163 1164 err = validateBaseName(basename) 1165 if err != nil { 1166 return 1167 } 1168 1169 // We need both dirInodelock and the targetInode lock to make sure they 1170 // don't go away and linkCount is updated correctly. 1171 callerID := dlm.GenerateCallerID() 1172 dirInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(dirInodeNumber, callerID) 1173 if err != nil { 1174 return 1175 } 1176 1177 targetInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(targetInodeNumber, callerID) 1178 if err != nil { 1179 return 1180 } 1181 1182 // Lock the target inode to check its type and insure its not a directory (if it is a 1183 // directory then locking it after the target directory could result in deadlock). 1184 err = targetInodeLock.WriteLock() 1185 if err != nil { 1186 return 1187 } 1188 1189 // make sure target inode is not a directory 1190 inodeType, err = vS.inodeVolumeHandle.GetType(targetInodeNumber) 1191 if err != nil { 1192 targetInodeLock.Unlock() 1193 // Because we know that GetType() has already "blunderized" the error, we just pass it on 1194 logger.ErrorfWithError(err, "%s: couldn't get type for inode %v", utils.GetFnName(), targetInodeNumber) 1195 return err 1196 } 1197 if inodeType == inode.DirType { 1198 targetInodeLock.Unlock() 1199 // no need to print an error when its a mistake by the client 1200 err = fmt.Errorf("%s: inode %v cannot be a dir inode", utils.GetFnName(), targetInodeNumber) 1201 return blunder.AddError(err, blunder.LinkDirError) 1202 } 1203 1204 // drop the target inode lock so we can get the directory lock then 1205 // reget the target inode lock 1206 targetInodeLock.Unlock() 1207 1208 err = dirInodeLock.WriteLock() 1209 if err != nil { 1210 return 1211 } 1212 defer dirInodeLock.Unlock() 1213 1214 err = targetInodeLock.WriteLock() 1215 if err != nil { 1216 return 1217 } 1218 defer targetInodeLock.Unlock() 1219 1220 if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 1221 inode.NoOverride) { 1222 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 1223 return 1224 } 1225 if !vS.inodeVolumeHandle.Access(targetInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 1226 inode.NoOverride) { 1227 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 1228 return 1229 } 1230 if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK, 1231 inode.NoOverride) { 1232 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 1233 return 1234 } 1235 1236 err = vS.inodeVolumeHandle.Link(dirInodeNumber, basename, targetInodeNumber, false) 1237 1238 // if the link was successful and this is a regular file then any 1239 // pending data was flushed 1240 if err == nil && inodeType == inode.FileType { 1241 vS.untrackInFlightFileInodeData(targetInodeNumber, false) 1242 } 1243 1244 return err 1245 } 1246 1247 func (vS *volumeStruct) ListXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (streamNames []string, err error) { 1248 startTime := time.Now() 1249 defer func() { 1250 globals.ListXAttrUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 1251 if err != nil { 1252 globals.ListXAttrErrors.Add(1) 1253 } 1254 }() 1255 1256 vS.jobRWMutex.RLock() 1257 defer vS.jobRWMutex.RUnlock() 1258 1259 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 1260 if err != nil { 1261 return 1262 } 1263 err = inodeLock.ReadLock() 1264 if err != nil { 1265 return 1266 } 1267 defer inodeLock.Unlock() 1268 1269 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 1270 inode.NoOverride) { 1271 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 1272 return 1273 } 1274 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK, 1275 inode.OwnerOverride) { 1276 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 1277 return 1278 } 1279 1280 metadata, err := vS.inodeVolumeHandle.GetMetadata(inodeNumber) 1281 if err != nil { 1282 // Did not find the requested stream. However this isn't really an error since 1283 // samba will ask for acl-related streams and is fine with not finding them. 1284 logger.TracefWithError(err, "Failed to list XAttrs of inode %v", inodeNumber) 1285 return 1286 } 1287 1288 streamNames = make([]string, len(metadata.InodeStreamNameSlice)) 1289 copy(streamNames, metadata.InodeStreamNameSlice) 1290 return 1291 } 1292 1293 func (vS *volumeStruct) Lookup(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, dirInodeNumber inode.InodeNumber, basename string) (inodeNumber inode.InodeNumber, err error) { 1294 startTime := time.Now() 1295 defer func() { 1296 globals.LookupUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 1297 if err != nil { 1298 globals.LookupErrors.Add(1) 1299 } 1300 }() 1301 1302 vS.jobRWMutex.RLock() 1303 defer vS.jobRWMutex.RUnlock() 1304 1305 dirInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(dirInodeNumber, nil) 1306 if err != nil { 1307 return 1308 } 1309 dirInodeLock.ReadLock() 1310 defer dirInodeLock.Unlock() 1311 1312 if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 1313 inode.NoOverride) { 1314 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 1315 return 1316 } 1317 if !vS.inodeVolumeHandle.Access(dirInodeNumber, userID, groupID, otherGroupIDs, inode.X_OK, 1318 inode.NoOverride) { 1319 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 1320 return 1321 } 1322 1323 inodeNumber, err = vS.inodeVolumeHandle.Lookup(dirInodeNumber, basename) 1324 return inodeNumber, err 1325 } 1326 1327 func (vS *volumeStruct) LookupPath(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, fullpath string) (inodeNumber inode.InodeNumber, err error) { 1328 startTime := time.Now() 1329 defer func() { 1330 globals.LookupPathUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 1331 if err != nil { 1332 globals.LookupPathErrors.Add(1) 1333 } 1334 }() 1335 1336 vS.jobRWMutex.RLock() 1337 defer vS.jobRWMutex.RUnlock() 1338 1339 // In the special case of a fullpath starting with "/", the path segment splitting above 1340 // results in a first segment that still begins with "/". Because this is not recognized 1341 // as a real path segment, by the underlying code, we have trouble looking it up. 1342 // 1343 // This is a hack to work around this case until I figure out a better way. 1344 newfullpath := strings.TrimPrefix(fullpath, "/") 1345 if strings.Compare(fullpath, newfullpath) != 0 { 1346 fullpath = newfullpath 1347 } 1348 1349 pathSegments := strings.Split(path.Clean(fullpath), "/") 1350 1351 cursorInodeNumber := inode.RootDirInodeNumber 1352 for _, segment := range pathSegments { 1353 cursorInodeLock, err1 := vS.inodeVolumeHandle.InitInodeLock(cursorInodeNumber, nil) 1354 if err = err1; err != nil { 1355 return 1356 } 1357 err = cursorInodeLock.ReadLock() 1358 if err != nil { 1359 return 1360 } 1361 1362 if !vS.inodeVolumeHandle.Access(cursorInodeNumber, userID, groupID, otherGroupIDs, inode.X_OK, 1363 inode.NoOverride) { 1364 cursorInodeLock.Unlock() 1365 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 1366 return 1367 } 1368 1369 cursorInodeNumber, err = vS.inodeVolumeHandle.Lookup(cursorInodeNumber, segment) 1370 cursorInodeLock.Unlock() 1371 1372 if err != nil { 1373 return cursorInodeNumber, err 1374 } 1375 } 1376 1377 return cursorInodeNumber, nil 1378 } 1379 1380 func (vS *volumeStruct) MiddlewareCoalesce(destPath string, metaData []byte, elementPaths []string) ( 1381 ino uint64, numWrites uint64, attrChangeTime uint64, modificationTime uint64, err error) { 1382 1383 var ( 1384 coalesceElementList []*inode.CoalesceElement 1385 coalesceSize uint64 1386 ctime time.Time 1387 destFileInodeNumber inode.InodeNumber 1388 dirEntryBasename string 1389 dirEntryInodeNumber inode.InodeNumber 1390 dirInodeNumber inode.InodeNumber 1391 elementPathIndex int 1392 elementPathIndexAtChunkEnd int 1393 elementPathIndexAtChunkStart int 1394 heldLocks *heldLocksStruct 1395 mtime time.Time 1396 retryRequired bool 1397 tryLockBackoffContext *tryLockBackoffContextStruct 1398 ) 1399 1400 startTime := time.Now() 1401 defer func() { 1402 globals.MiddlewareCoalesceUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 1403 globals.MiddlewareCoalesceBytes.Add(coalesceSize) 1404 if err != nil { 1405 globals.MiddlewareCoalesceErrors.Add(1) 1406 } 1407 }() 1408 1409 vS.jobRWMutex.RLock() 1410 defer vS.jobRWMutex.RUnlock() 1411 1412 // First create the destination file if necessary and ensure that it is empty 1413 1414 tryLockBackoffContext = &tryLockBackoffContextStruct{} 1415 1416 RestartDestinationFileCreation: 1417 1418 tryLockBackoffContext.backoff() 1419 1420 heldLocks = newHeldLocks() 1421 1422 _, destFileInodeNumber, _, _, retryRequired, err = 1423 vS.resolvePath( 1424 inode.RootDirInodeNumber, 1425 destPath, 1426 heldLocks, 1427 resolvePathFollowDirEntrySymlinks| 1428 resolvePathFollowDirSymlinks| 1429 resolvePathCreateMissingPathElements| 1430 resolvePathRequireExclusiveLockOnDirEntryInode) 1431 1432 if nil != err { 1433 heldLocks.free() 1434 return 1435 } 1436 1437 if retryRequired { 1438 heldLocks.free() 1439 goto RestartDestinationFileCreation 1440 } 1441 1442 vS.inodeVolumeHandle.SetSize(destFileInodeNumber, 0) 1443 1444 heldLocks.free() 1445 1446 // Now setup for looping through elementPaths with fresh locks 1447 // every globals.coalesceElementChunkSize elements holding an 1448 // Exclusive Lock on each FileInode and their containing DirInode 1449 1450 elementPathIndexAtChunkStart = 0 1451 1452 for elementPathIndexAtChunkStart < len(elementPaths) { 1453 elementPathIndexAtChunkEnd = elementPathIndexAtChunkStart + int(globals.coalesceElementChunkSize) 1454 if elementPathIndexAtChunkEnd > len(elementPaths) { 1455 elementPathIndexAtChunkEnd = len(elementPaths) 1456 } 1457 1458 // Coalesce elementPaths[elementPathIndexAtChunkStart:elementPathIndexAtChunkEnd) 1459 1460 tryLockBackoffContext = &tryLockBackoffContextStruct{} 1461 1462 RestartCoalesceChunk: 1463 1464 tryLockBackoffContext.backoff() 1465 1466 heldLocks = newHeldLocks() 1467 1468 coalesceElementList = make([]*inode.CoalesceElement, 0, (elementPathIndexAtChunkEnd - elementPathIndexAtChunkStart)) 1469 1470 for elementPathIndex = elementPathIndexAtChunkStart; elementPathIndex < elementPathIndexAtChunkEnd; elementPathIndex++ { 1471 dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, _, retryRequired, err = 1472 vS.resolvePath( 1473 inode.RootDirInodeNumber, 1474 elementPaths[elementPathIndex], 1475 heldLocks, 1476 resolvePathFollowDirSymlinks| 1477 resolvePathRequireExclusiveLockOnDirEntryInode| 1478 resolvePathRequireExclusiveLockOnDirInode) 1479 1480 if nil != err { 1481 heldLocks.free() 1482 return 1483 } 1484 1485 if retryRequired { 1486 heldLocks.free() 1487 goto RestartCoalesceChunk 1488 } 1489 1490 coalesceElementList = append(coalesceElementList, &inode.CoalesceElement{ 1491 ContainingDirectoryInodeNumber: dirInodeNumber, 1492 ElementInodeNumber: dirEntryInodeNumber, 1493 ElementName: dirEntryBasename, 1494 }) 1495 } 1496 1497 _, destFileInodeNumber, _, _, retryRequired, err = 1498 vS.resolvePath( 1499 inode.RootDirInodeNumber, 1500 destPath, 1501 heldLocks, 1502 resolvePathFollowDirEntrySymlinks| 1503 resolvePathFollowDirSymlinks| 1504 resolvePathRequireExclusiveLockOnDirEntryInode) 1505 1506 if nil != err { 1507 heldLocks.free() 1508 return 1509 } 1510 1511 if retryRequired { 1512 heldLocks.free() 1513 goto RestartCoalesceChunk 1514 } 1515 1516 ctime, mtime, numWrites, coalesceSize, err = vS.inodeVolumeHandle.Coalesce( 1517 destFileInodeNumber, MiddlewareStream, metaData, coalesceElementList) 1518 1519 heldLocks.free() 1520 1521 if nil != err { 1522 return 1523 } 1524 1525 elementPathIndexAtChunkStart = elementPathIndexAtChunkEnd 1526 } 1527 1528 // Regardless of err return, fill in other return values 1529 1530 ino = uint64(destFileInodeNumber) 1531 attrChangeTime = uint64(ctime.UnixNano()) 1532 modificationTime = uint64(mtime.UnixNano()) 1533 1534 return 1535 } 1536 1537 func (vS *volumeStruct) MiddlewareDelete(parentDir string, basename string) (err error) { 1538 var ( 1539 dirEntryBasename string 1540 dirEntryInodeNumber inode.InodeNumber 1541 dirInodeNumber inode.InodeNumber 1542 doDestroy bool 1543 heldLocks *heldLocksStruct 1544 inodeType inode.InodeType 1545 inodeVolumeHandle inode.VolumeHandle 1546 linkCount uint64 1547 numDirEntries uint64 1548 retryRequired bool 1549 toDestroyInodeNumber inode.InodeNumber 1550 tryLockBackoffContext *tryLockBackoffContextStruct 1551 ) 1552 1553 startTime := time.Now() 1554 defer func() { 1555 globals.MiddlewareDeleteUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 1556 if err != nil { 1557 globals.MiddlewareDeleteErrors.Add(1) 1558 } 1559 }() 1560 1561 // Retry until done or failure (starting with ZERO backoff) 1562 1563 tryLockBackoffContext = &tryLockBackoffContextStruct{} 1564 1565 Restart: 1566 1567 // Perform backoff and update for each restart (starting with ZERO backoff of course) 1568 1569 tryLockBackoffContext.backoff() 1570 1571 // Construct fresh heldLocks for this restart 1572 1573 heldLocks = newHeldLocks() 1574 1575 dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, _, retryRequired, err = 1576 vS.resolvePath( 1577 inode.RootDirInodeNumber, 1578 parentDir+"/"+basename, 1579 heldLocks, 1580 resolvePathFollowDirSymlinks| 1581 resolvePathRequireExclusiveLockOnDirEntryInode| 1582 resolvePathRequireExclusiveLockOnDirInode) 1583 1584 if nil != err { 1585 heldLocks.free() 1586 return 1587 } 1588 1589 if retryRequired { 1590 heldLocks.free() 1591 goto Restart 1592 } 1593 1594 // Check if Unlink() and Destroy() are doable 1595 1596 inodeVolumeHandle = vS.inodeVolumeHandle 1597 1598 inodeType, err = inodeVolumeHandle.GetType(dirEntryInodeNumber) 1599 if nil != err { 1600 heldLocks.free() 1601 return 1602 } 1603 1604 if inode.DirType == inodeType { 1605 numDirEntries, err = inodeVolumeHandle.NumDirEntries(dirEntryInodeNumber) 1606 if nil != err { 1607 heldLocks.free() 1608 return 1609 } 1610 1611 if 2 != numDirEntries { 1612 heldLocks.free() 1613 err = blunder.NewError(blunder.NotEmptyError, "%s/%s not empty", parentDir, basename) 1614 return 1615 } 1616 1617 doDestroy = true 1618 } else { 1619 linkCount, err = inodeVolumeHandle.GetLinkCount(dirEntryInodeNumber) 1620 if nil != err { 1621 heldLocks.free() 1622 return 1623 } 1624 1625 doDestroy = (1 == linkCount) 1626 } 1627 1628 // Now perform the Unlink() and (potentially) Destroy() 1629 1630 toDestroyInodeNumber, err = inodeVolumeHandle.Unlink(dirInodeNumber, dirEntryBasename, false) 1631 if nil != err { 1632 heldLocks.free() 1633 return 1634 } 1635 1636 if doDestroy && (inode.InodeNumber(0) != toDestroyInodeNumber) { 1637 err = inodeVolumeHandle.Destroy(toDestroyInodeNumber) 1638 if nil != err { 1639 logger.Errorf("fs.MiddlewareDelete() failed to Destroy dirEntryInodeNumber 0x%016X: %v", dirEntryInodeNumber, err) 1640 } 1641 } 1642 1643 // Release heldLocks and exit with success (even if Destroy() failed earlier) 1644 1645 heldLocks.free() 1646 1647 err = nil 1648 return 1649 } 1650 1651 func (vS *volumeStruct) middlewareReadDirHelper(path string, maxEntries uint64, prevBasename string) (pathDirInodeNumber inode.InodeNumber, dirEntrySlice []inode.DirEntry, moreEntries bool, err error) { 1652 var ( 1653 dirEntrySliceElement inode.DirEntry 1654 heldLocks *heldLocksStruct 1655 internalDirEntrySlice []inode.DirEntry 1656 retryRequired bool 1657 tryLockBackoffContext *tryLockBackoffContextStruct 1658 ) 1659 1660 // Retry until done or failure (starting with ZERO backoff) 1661 1662 tryLockBackoffContext = &tryLockBackoffContextStruct{} 1663 1664 Restart: 1665 1666 // Perform backoff and update for each restart (starting with ZERO backoff of course) 1667 1668 tryLockBackoffContext.backoff() 1669 1670 // Construct fresh heldLocks for this restart 1671 1672 heldLocks = newHeldLocks() 1673 1674 _, pathDirInodeNumber, _, _, retryRequired, err = 1675 vS.resolvePath( 1676 inode.RootDirInodeNumber, 1677 path, 1678 heldLocks, 1679 resolvePathFollowDirSymlinks) 1680 1681 if nil != err { 1682 heldLocks.free() 1683 return 1684 } 1685 1686 if retryRequired { 1687 heldLocks.free() 1688 goto Restart 1689 } 1690 1691 // Now assemble response 1692 1693 internalDirEntrySlice, moreEntries, err = vS.inodeVolumeHandle.ReadDir(pathDirInodeNumber, maxEntries, 0, prevBasename) 1694 if nil != err { 1695 heldLocks.free() 1696 return 1697 } 1698 1699 // No need to hold any locks now... directory contents should be allowed to change while enumerating 1700 heldLocks.free() 1701 1702 dirEntrySlice = make([]inode.DirEntry, 0, len(internalDirEntrySlice)) 1703 1704 for _, dirEntrySliceElement = range internalDirEntrySlice { 1705 if ("." == dirEntrySliceElement.Basename) || (".." == dirEntrySliceElement.Basename) { 1706 dirEntrySliceElement.Type = inode.DirType 1707 } else { 1708 dirEntrySliceElement.Type, err = vS.GetType(inode.InodeRootUserID, inode.InodeGroupID(0), nil, dirEntrySliceElement.InodeNumber) 1709 if nil != err { 1710 // It's ok to have an error here... it just means the directory we are iterating is changing 1711 continue 1712 } 1713 } 1714 dirEntrySlice = append(dirEntrySlice, dirEntrySliceElement) 1715 } 1716 1717 dirEntrySlice = dirEntrySlice[:len(dirEntrySlice)] 1718 1719 err = nil 1720 return 1721 } 1722 1723 func (vS *volumeStruct) MiddlewareGetAccount(maxEntries uint64, marker string, endmarker string) (accountEnts []AccountEntry, mtime uint64, ctime uint64, err error) { 1724 var ( 1725 dirEntrySlice []inode.DirEntry 1726 dirEntrySliceElement inode.DirEntry 1727 remainingMaxEntries uint64 1728 moreEntries bool 1729 statResult Stat 1730 ) 1731 1732 statResult, err = vS.Getstat(inode.InodeRootUserID, inode.InodeGroupID(0), nil, inode.RootDirInodeNumber) 1733 if nil != err { 1734 return 1735 } 1736 mtime = statResult[StatMTime] 1737 ctime = statResult[StatCTime] 1738 1739 if 0 != maxEntries { 1740 // Hard limit to number of DirInode Basenames to return 1741 accountEnts = make([]AccountEntry, 0, maxEntries) 1742 } 1743 1744 remainingMaxEntries = maxEntries 1745 1746 moreEntries = true 1747 1748 for moreEntries { 1749 _, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper("/", remainingMaxEntries, marker) 1750 if nil != err { 1751 return 1752 } 1753 1754 if 0 == maxEntries { 1755 // No limit to number of DirInode Basenames to return... so it must be <= len(dirEntrySlice) 1756 accountEnts = make([]AccountEntry, 0, len(dirEntrySlice)) 1757 // Note: moreEntries should be false so the "for moreEntries" loop should exit after 1st iteration 1758 } 1759 1760 for _, dirEntrySliceElement = range dirEntrySlice { 1761 if ("" != endmarker) && (0 <= strings.Compare(dirEntrySliceElement.Basename, endmarker)) { 1762 moreEntries = false 1763 break 1764 } 1765 if ("." != dirEntrySliceElement.Basename) && (".." != dirEntrySliceElement.Basename) { 1766 // So we've skipped "." & ".." - now also skip non-DirInodes 1767 if inode.DirType == dirEntrySliceElement.Type { 1768 statResult, err = vS.Getstat(inode.InodeRootUserID, inode.InodeGroupID(0), nil, dirEntrySliceElement.InodeNumber) 1769 if nil != err { 1770 return 1771 } 1772 accountEnts = append(accountEnts, AccountEntry{ 1773 Basename: dirEntrySliceElement.Basename, 1774 ModificationTime: statResult[StatMTime], 1775 AttrChangeTime: statResult[StatCTime], 1776 }) 1777 } 1778 } 1779 } 1780 1781 if moreEntries && (0 != maxEntries) { 1782 remainingMaxEntries = maxEntries - uint64(len(accountEnts)) 1783 if 0 == remainingMaxEntries { 1784 moreEntries = false 1785 } 1786 } 1787 1788 if moreEntries { 1789 // Adjust marker to fetch next dirEntrySlice 1790 marker = dirEntrySlice[len(dirEntrySlice)-1].Basename 1791 } 1792 } 1793 1794 accountEnts = accountEnts[:len(accountEnts)] 1795 1796 return 1797 } 1798 1799 type dirEntrySliceStackElementStruct struct { 1800 dirPath string 1801 dirEntrySlice []inode.DirEntry 1802 numConsumed int 1803 moreEntries bool 1804 } 1805 1806 func (vS *volumeStruct) MiddlewareGetContainer(vContainerName string, maxEntries uint64, marker string, endmarker string, prefix string, delimiter string) (containerEnts []ContainerEntry, err error) { 1807 var ( 1808 containerEntry ContainerEntry 1809 containerEntryBasename string // Misnamed... this is actually everything after ContainerName 1810 containerEntryPath string 1811 containerEntryPathSplit []string // Split on only the first '/' (to remove ContainerName from it) 1812 doSingleDirectory bool 1813 dirEntryInodeLock *dlm.RWLockStruct 1814 dirEntryInodeNumber inode.InodeNumber 1815 dirEntryInodeType inode.InodeType 1816 dirEntryMetadata *inode.MetadataStruct 1817 dirEntryPath string 1818 dirEntrySlice []inode.DirEntry 1819 dirEntrySliceElement inode.DirEntry 1820 dirEntrySliceElementIndex int 1821 dirEntrySliceElementToPrepend *inode.DirEntry 1822 dirEntrySliceStack []*dirEntrySliceStackElementStruct 1823 dirEntrySliceStackElement *dirEntrySliceStackElementStruct 1824 dirEntrySliceToAppend []inode.DirEntry 1825 dirInodeNumber inode.InodeNumber 1826 dirPath string 1827 dirPathSplit []string 1828 dlmCallerID dlm.CallerID 1829 endmarkerCanonicalized string 1830 endmarkerPath []string 1831 heldLocks *heldLocksStruct 1832 initialDirEntryToMatch string // == "" if no initial path should be returned (i.e. in marker starting point case) 1833 inodeVolumeHandle inode.VolumeHandle 1834 markerCanonicalized string 1835 markerPath []string 1836 markerPathDirInodeIndex int 1837 moreEntries bool 1838 pathIndex int 1839 prefixCanonicalized string 1840 prefixPath []string 1841 prefixPathDirInodeIndex int 1842 prevReturned string 1843 remainingMaxEntries uint64 1844 retryRequired bool 1845 tryLockBackoffContext *tryLockBackoffContextStruct 1846 ) 1847 1848 // Validate marker, endmarker, and prefix 1849 1850 if "" == marker { 1851 markerPath = []string{} 1852 markerPathDirInodeIndex = -1 // Must be special cased below to ensure we don't look in markerPath 1853 markerCanonicalized = "" // Actually never accessed 1854 } else { 1855 markerPath, markerPathDirInodeIndex, err = vS.canonicalizePathAndLocateLeafDirInode(vContainerName + "/" + marker) 1856 if nil != err { 1857 err = blunder.AddError(err, blunder.InvalidArgError) 1858 return 1859 } 1860 1861 markerCanonicalized = strings.Join(markerPath, "/") 1862 if strings.HasSuffix(marker, "/") { 1863 markerCanonicalized += "/" 1864 } 1865 1866 if vContainerName+"/"+marker != markerCanonicalized { 1867 err = blunder.NewError(blunder.InvalidArgError, "MiddlewareGetContainer() only supports a canonicalized marker") 1868 return 1869 } 1870 } 1871 1872 if "" == endmarker { 1873 endmarkerPath = []string{} 1874 endmarkerCanonicalized = "" // Actually never accessed 1875 } else { 1876 endmarkerPath, _, err = vS.canonicalizePathAndLocateLeafDirInode(vContainerName + "/" + endmarker) 1877 if nil != err { 1878 err = blunder.AddError(err, blunder.InvalidArgError) 1879 return 1880 } 1881 1882 endmarkerCanonicalized = strings.Join(endmarkerPath, "/") 1883 if strings.HasSuffix(endmarker, "/") { 1884 endmarkerCanonicalized += "/" 1885 } 1886 1887 if vContainerName+"/"+endmarker != endmarkerCanonicalized { 1888 err = blunder.NewError(blunder.InvalidArgError, "MiddlewareGetContainer() only supports a canonicalized endmarker") 1889 return 1890 } 1891 } 1892 1893 prefixPath, prefixPathDirInodeIndex, err = vS.canonicalizePathAndLocateLeafDirInode(vContainerName + "/" + prefix) 1894 if nil != err { 1895 err = blunder.AddError(err, blunder.InvalidArgError) 1896 return 1897 } 1898 if prefixPathDirInodeIndex < 0 { 1899 err = blunder.NewError(blunder.NotFoundError, "MiddlewareGetContainer() only supports querying an existing Container") 1900 return 1901 } 1902 1903 prefixCanonicalized = strings.Join(prefixPath, "/") 1904 if strings.HasSuffix(prefix, "/") { 1905 prefixCanonicalized += "/" 1906 } 1907 1908 if (prefix != "") && (vContainerName+"/"+prefix != prefixCanonicalized) { 1909 err = blunder.NewError(blunder.InvalidArgError, "MiddlewareGetContainer() only supports a canonicalized prefix") 1910 return 1911 } 1912 1913 // Validate delimiter 1914 1915 switch delimiter { 1916 case "": 1917 doSingleDirectory = false 1918 case "/": 1919 doSingleDirectory = true 1920 default: 1921 err = blunder.NewError(blunder.InvalidArgError, "MiddlewareGetContainer() only supports a delimiter of \"/\"") 1922 return 1923 } 1924 1925 // Determine what DirInode from which to begin our enumeration 1926 1927 pathIndex = 0 1928 1929 for { 1930 if (pathIndex > markerPathDirInodeIndex) && (pathIndex > prefixPathDirInodeIndex) { 1931 // Special (though probably typical) case where marker lands in prefix-indicated directory 1932 1933 dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/") 1934 1935 if (1 == len(prefixPath)) || strings.HasSuffix(prefix, "/") { 1936 if (markerPathDirInodeIndex + 1) == len(markerPath) { 1937 prevReturned = "" 1938 } else { 1939 prevReturned = markerPath[markerPathDirInodeIndex+1] 1940 } 1941 initialDirEntryToMatch = "" 1942 } else { 1943 // Handle four remaining cases: 1944 // marker & prefix both specified directories 1945 // marker specified a directory, prefix did not 1946 // prefix specified a directory, marker did not 1947 // neither marker nor prefix specified a directory 1948 1949 if (markerPathDirInodeIndex + 1) == len(markerPath) { 1950 if (prefixPathDirInodeIndex + 1) == len(prefixPath) { 1951 // Case where marker & prefix both specified directories 1952 1953 prevReturned = "" 1954 } else { 1955 // Case where marker specified a directory, prefix did not 1956 1957 prevReturned = prefixPath[prefixPathDirInodeIndex+1] 1958 } 1959 initialDirEntryToMatch = prevReturned 1960 } else { // (markerPathDirInodeIndex + 1) != len(markerPath) 1961 if (prefixPathDirInodeIndex + 1) == len(prefixPath) { 1962 // Case where prefix specified a directory, marker did not 1963 1964 prevReturned = markerPath[markerPathDirInodeIndex+1] 1965 initialDirEntryToMatch = "" 1966 } else { 1967 // Case where neither marker nor prefix specified a directory 1968 1969 if strings.Compare(prefixPath[prefixPathDirInodeIndex+1], markerPath[markerPathDirInodeIndex+1]) <= 0 { 1970 prevReturned = markerPath[markerPathDirInodeIndex+1] 1971 initialDirEntryToMatch = "" 1972 } else { 1973 prevReturned = prefixPath[prefixPathDirInodeIndex+1] 1974 initialDirEntryToMatch = prevReturned 1975 } 1976 } 1977 } 1978 } 1979 break 1980 } 1981 1982 if pathIndex > markerPathDirInodeIndex { 1983 // Handle case where prefix is more constraining than marker 1984 1985 if prefixPathDirInodeIndex == (len(prefixPath) - 1) { 1986 if (1 == len(prefixPath)) || strings.HasSuffix(prefix, "/") { 1987 dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/") 1988 prevReturned = "" 1989 } else { 1990 dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex], "/") 1991 prevReturned = prefixPath[len(prefixPath)-1] 1992 } 1993 } else { 1994 dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/") 1995 prevReturned = prefixPath[len(prefixPath)-1] 1996 } 1997 initialDirEntryToMatch = prevReturned 1998 break 1999 } 2000 2001 if pathIndex > prefixPathDirInodeIndex { 2002 // Handle case where marker is more constraining than prefix 2003 2004 dirPath = strings.Join(markerPath[:markerPathDirInodeIndex+1], "/") 2005 if markerPathDirInodeIndex == (len(markerPath) - 1) { 2006 prevReturned = "" 2007 } else { 2008 prevReturned = markerPath[len(markerPath)-1] 2009 } 2010 initialDirEntryToMatch = "" 2011 break 2012 } 2013 2014 switch strings.Compare(prefixPath[pathIndex], markerPath[pathIndex]) { 2015 case -1: 2016 dirPath = strings.Join(markerPath[:markerPathDirInodeIndex+1], "/") 2017 if markerPathDirInodeIndex == (len(markerPath) - 1) { 2018 prevReturned = "" 2019 } else { 2020 prevReturned = markerPath[len(markerPath)-1] 2021 } 2022 initialDirEntryToMatch = "" 2023 break 2024 case 0: 2025 pathIndex++ 2026 case 1: 2027 if prefixPathDirInodeIndex == (len(prefixPath) - 1) { 2028 if (1 == len(prefixPath)) || strings.HasSuffix(prefix, "/") { 2029 dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/") 2030 prevReturned = "" 2031 } else { 2032 dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex], "/") 2033 prevReturned = prefixPath[len(prefixPath)-1] 2034 } 2035 } else { 2036 dirPath = strings.Join(prefixPath[:prefixPathDirInodeIndex+1], "/") 2037 prevReturned = prefixPath[len(prefixPath)-1] 2038 } 2039 initialDirEntryToMatch = prevReturned 2040 break 2041 } 2042 } 2043 2044 // Setup shortcuts/contants 2045 2046 dlmCallerID = dlm.GenerateCallerID() 2047 inodeVolumeHandle = vS.inodeVolumeHandle 2048 2049 // Compute initial response 2050 2051 tryLockBackoffContext = &tryLockBackoffContextStruct{} 2052 2053 Restart: 2054 2055 tryLockBackoffContext.backoff() 2056 2057 heldLocks = newHeldLocks() 2058 2059 _, dirInodeNumber, _, _, retryRequired, err = 2060 vS.resolvePath( 2061 inode.RootDirInodeNumber, 2062 dirPath, 2063 heldLocks, 2064 resolvePathDirEntryInodeMustBeDirectory) 2065 if nil != err { 2066 heldLocks.free() 2067 return 2068 } 2069 if retryRequired { 2070 heldLocks.free() 2071 goto Restart 2072 } 2073 2074 containerEnts = make([]ContainerEntry, 0, maxEntries) 2075 2076 if 0 == maxEntries { 2077 heldLocks.free() 2078 err = nil 2079 return 2080 } 2081 2082 if "" == initialDirEntryToMatch { 2083 dirEntrySliceElementToPrepend = nil 2084 } else { 2085 if "" == dirPath { 2086 dirEntryPath = initialDirEntryToMatch 2087 } else { 2088 dirEntryPath = dirPath + "/" + initialDirEntryToMatch 2089 } 2090 if ("" != endmarker) && (strings.Compare(dirEntryPath, endmarkerCanonicalized) >= 0) { 2091 heldLocks.free() 2092 err = nil 2093 return 2094 } 2095 dirEntryInodeNumber, err = inodeVolumeHandle.Lookup(dirInodeNumber, initialDirEntryToMatch) 2096 if nil == err { 2097 retryRequired = heldLocks.attemptSharedLock(inodeVolumeHandle, dlmCallerID, dirEntryInodeNumber) 2098 if retryRequired { 2099 heldLocks.free() 2100 goto Restart 2101 } 2102 dirEntryInodeType, err = inodeVolumeHandle.GetType(dirEntryInodeNumber) 2103 if nil == err { 2104 dirEntrySliceElementToPrepend = &inode.DirEntry{ 2105 InodeNumber: dirEntryInodeNumber, 2106 Basename: initialDirEntryToMatch, 2107 Type: dirEntryInodeType, 2108 } 2109 } else { 2110 dirEntrySliceElementToPrepend = nil 2111 } 2112 heldLocks.unlock(dirEntryInodeNumber) 2113 } else { 2114 dirEntrySliceElementToPrepend = nil 2115 } 2116 } 2117 2118 heldLocks.free() 2119 2120 if 0 == maxEntries { 2121 remainingMaxEntries = 0 2122 } else { 2123 if nil == dirEntrySliceElementToPrepend { 2124 remainingMaxEntries = maxEntries 2125 } else { 2126 remainingMaxEntries = maxEntries - 1 2127 } 2128 } 2129 2130 // At this point: 2131 // no heldLocks 2132 // containerEnts has been declared 2133 // doSingleDirectory is set based on supplied delimiter 2134 // if {marker,endmarker,prefix} asked to include an exact matched path that existed, it's in dirEntrySliceElementToPrepend 2135 // prefixCanonicalized & endmarkerCanonicalized are set to terminate the ensuing treewalk 2136 // remainingMaxEntries indicates how many more DirEntry's will fit in containerEnts (if capped) 2137 // dirPath is pointing to the initial DirInode to read 2138 // prevReturned indicates from where in the DirInode to start reading 2139 2140 // Perform initial ReadDir and place in dirEntrySliceStack 2141 2142 if nil == dirEntrySliceElementToPrepend { 2143 _, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned) 2144 if nil != err { 2145 return 2146 } 2147 } else { 2148 if 0 == remainingMaxEntries { 2149 dirEntrySlice = []inode.DirEntry{*dirEntrySliceElementToPrepend} 2150 moreEntries = false 2151 } else { 2152 _, dirEntrySliceToAppend, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned) 2153 if nil == err { 2154 dirEntrySlice = make([]inode.DirEntry, 1, 1+len(dirEntrySliceToAppend)) 2155 dirEntrySlice[0] = *dirEntrySliceElementToPrepend 2156 dirEntrySlice = append(dirEntrySlice, dirEntrySliceToAppend...) 2157 } else { 2158 return 2159 } 2160 } 2161 } 2162 2163 dirEntrySliceStackElement = &dirEntrySliceStackElementStruct{ 2164 dirPath: dirPath, 2165 dirEntrySlice: dirEntrySlice, 2166 numConsumed: 0, 2167 moreEntries: moreEntries, 2168 } 2169 2170 dirEntrySliceStack = []*dirEntrySliceStackElementStruct{dirEntrySliceStackElement} 2171 2172 containerEnts = make([]ContainerEntry, 0, len(dirEntrySlice)) 2173 2174 // Now append appropriate ContainerEntry's until exit criteria is reached 2175 2176 for uint64(len(containerEnts)) < maxEntries { 2177 dirEntrySliceStackElement = dirEntrySliceStack[len(dirEntrySliceStack)-1] 2178 2179 if dirEntrySliceStackElement.numConsumed == len(dirEntrySliceStackElement.dirEntrySlice) { 2180 if dirEntrySliceStackElement.moreEntries { 2181 dirPath = dirEntrySliceStackElement.dirPath 2182 dirEntrySlice = dirEntrySliceStackElement.dirEntrySlice 2183 dirEntrySliceElementIndex = len(dirEntrySlice) - 1 2184 dirEntrySliceElement = dirEntrySlice[dirEntrySliceElementIndex] 2185 prevReturned = dirEntrySliceElement.Basename 2186 2187 _, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned) 2188 if (nil != err) || (0 == len(dirEntrySlice)) { 2189 // Even though we thought there were moreEntries, there now are not for some reason 2190 2191 if doSingleDirectory { 2192 // Regardless of remaining contents of dirEntrySliceStack, we must be done 2193 2194 err = nil 2195 return 2196 } 2197 2198 // Navigate to parent directory 2199 2200 dirEntrySliceStack = dirEntrySliceStack[:len(dirEntrySliceStack)-1] 2201 continue 2202 } 2203 2204 // Restart this loop on current dirEntrySliceStackElement with new middlewareReadDirHelper() results 2205 2206 dirEntrySliceStackElement.dirEntrySlice = dirEntrySlice 2207 dirEntrySliceStackElement.numConsumed = 0 2208 dirEntrySliceStackElement.moreEntries = moreEntries 2209 2210 continue 2211 } else { 2212 // We've reached the end of this DirInode 2213 2214 if doSingleDirectory { 2215 // Regardless of remaining contents of dirEntrySliceStack, we must be done 2216 2217 err = nil 2218 return 2219 } 2220 2221 // Navigate to parent directory (staying within this Container) 2222 2223 if 1 == len(dirEntrySliceStack) { 2224 // We are at the starting directory 2225 2226 dirPathSplit = strings.Split(dirEntrySliceStackElement.dirPath, "/") 2227 2228 if 1 == len(dirPathSplit) { 2229 // We just finished Container-level directory, so we are done 2230 2231 err = nil 2232 return 2233 } 2234 2235 // Modify dirEntrySliceStackElement to point to parent directory as if we'd just processed the dirEntry of this directory 2236 2237 dirPath = strings.Join(dirPathSplit[:len(dirPathSplit)-1], "/") 2238 2239 if 0 == maxEntries { 2240 remainingMaxEntries = 0 2241 } else { 2242 remainingMaxEntries = maxEntries - uint64(len(containerEnts)) 2243 } 2244 2245 prevReturned = dirPathSplit[len(dirPathSplit)-1] 2246 2247 _, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned) 2248 if nil != err { 2249 return 2250 } 2251 2252 dirEntrySliceStackElement.dirPath = dirPath 2253 dirEntrySliceStackElement.dirEntrySlice = dirEntrySlice 2254 dirEntrySliceStackElement.numConsumed = 0 2255 dirEntrySliceStackElement.moreEntries = moreEntries 2256 } else { 2257 // Parent directory already in dirEntrySliceStack... so just pop current ...Element 2258 2259 dirEntrySliceStack = dirEntrySliceStack[:len(dirEntrySliceStack)-1] 2260 } 2261 2262 continue 2263 } 2264 } 2265 2266 // Consume next dirEntrySliceElement 2267 // ...skipping "." and ".." 2268 // ...skipping if <dirPath>/<Basename> <= marker 2269 // ...recursing when encountering DirInode's if !doSingleDirectory 2270 // ...terminating early if either: 2271 // len(*containerEnts) reaches maxEntries 2272 // <dirPath>/<Basename> >= endmarker 2273 // <dirPath>/<Basename> does not start with prefix 2274 2275 dirEntrySlice = dirEntrySliceStackElement.dirEntrySlice 2276 dirEntrySliceElementIndex = dirEntrySliceStackElement.numConsumed 2277 dirEntrySliceElement = dirEntrySlice[dirEntrySliceElementIndex] 2278 2279 dirEntrySliceStackElement.numConsumed++ 2280 2281 if ("." == dirEntrySliceElement.Basename) || (".." == dirEntrySliceElement.Basename) { 2282 continue 2283 } 2284 2285 containerEntryPath = dirEntrySliceStackElement.dirPath + "/" + dirEntrySliceElement.Basename 2286 2287 if ("" != marker) && (strings.Compare(containerEntryPath, markerCanonicalized) <= 0) { 2288 err = nil 2289 return 2290 } 2291 if ("" != endmarker) && (strings.Compare(containerEntryPath, endmarkerCanonicalized) >= 0) { 2292 err = nil 2293 return 2294 } 2295 if ("" != prefix) && !strings.HasPrefix(containerEntryPath, prefixCanonicalized) { 2296 err = nil 2297 return 2298 } 2299 2300 // Ok... so we actually want to append this entry to containerEnts 2301 2302 tryLockBackoffContext = &tryLockBackoffContextStruct{} 2303 2304 Retry: 2305 2306 tryLockBackoffContext.backoff() 2307 2308 dirEntryInodeLock, err = inodeVolumeHandle.AttemptReadLock(dirEntrySliceElement.InodeNumber, dlmCallerID) 2309 if nil != err { 2310 goto Retry 2311 } 2312 2313 dirEntryMetadata, err = inodeVolumeHandle.GetMetadata(dirEntrySliceElement.InodeNumber) 2314 if nil != err { 2315 // Ok... so it must have disappeared... just skip it 2316 2317 err = dirEntryInodeLock.Unlock() 2318 if nil != err { 2319 logger.Fatalf("Failure unlocking a held LockID %s: %v", dirEntryInodeLock.LockID, err) 2320 } 2321 2322 continue 2323 } 2324 2325 containerEntryPathSplit = strings.SplitN(containerEntryPath, "/", 2) 2326 containerEntryBasename = containerEntryPathSplit[1] 2327 2328 containerEntry = ContainerEntry{ 2329 Basename: containerEntryBasename, 2330 FileSize: dirEntryMetadata.Size, 2331 ModificationTime: uint64(dirEntryMetadata.ModificationTime.UnixNano()), 2332 AttrChangeTime: uint64(dirEntryMetadata.AttrChangeTime.UnixNano()), 2333 IsDir: (dirEntrySliceElement.Type == inode.DirType), 2334 NumWrites: dirEntryMetadata.NumWrites, 2335 InodeNumber: uint64(dirEntrySliceElement.InodeNumber), 2336 } 2337 2338 containerEntry.Metadata, err = inodeVolumeHandle.GetStream(dirEntrySliceElement.InodeNumber, MiddlewareStream) 2339 if nil != err { 2340 if blunder.Is(err, blunder.StreamNotFound) { 2341 // No MiddlewareStream... just make it appear empty 2342 2343 containerEntry.Metadata = []byte{} 2344 err = nil 2345 } else { 2346 // Ok... so it must have disappeared... just skip it 2347 2348 err = dirEntryInodeLock.Unlock() 2349 if nil != err { 2350 logger.Fatalf("Failure unlocking a held LockID %s: %v", dirEntryInodeLock.LockID, err) 2351 } 2352 2353 continue 2354 } 2355 } 2356 2357 // We can finally Unlock() this dirEntryInodeLock 2358 2359 err = dirEntryInodeLock.Unlock() 2360 if nil != err { 2361 logger.Fatalf("Failure unlocking a held LockID %s: %v", dirEntryInodeLock.LockID, err) 2362 } 2363 2364 // If we reach here, we get to append this containerEntry to containerEnts 2365 2366 containerEnts = append(containerEnts, containerEntry) 2367 2368 // We must now descend into dirEntryInode descend into it if it's a DirInode and !doSingleDirectory 2369 2370 if !doSingleDirectory && (dirEntrySliceElement.Type == inode.DirType) { 2371 dirPath = dirEntrySliceStackElement.dirPath + "/" + dirEntrySliceElement.Basename 2372 2373 if 0 == maxEntries { 2374 remainingMaxEntries = 0 2375 } else { 2376 remainingMaxEntries = maxEntries - uint64(len(containerEnts)) 2377 } 2378 2379 prevReturned = "" 2380 2381 _, dirEntrySlice, moreEntries, err = vS.middlewareReadDirHelper(dirPath, remainingMaxEntries, prevReturned) 2382 if nil != err { 2383 return 2384 } 2385 2386 dirEntrySliceStackElement = &dirEntrySliceStackElementStruct{ 2387 dirPath: dirPath, 2388 dirEntrySlice: dirEntrySlice, 2389 numConsumed: 0, 2390 moreEntries: moreEntries, 2391 } 2392 2393 dirEntrySliceStack = append(dirEntrySliceStack, dirEntrySliceStackElement) 2394 } 2395 } 2396 2397 // We will only reach here if we exhausted maxEntries before exhausing the tree/list of containerEntry's to append 2398 2399 err = nil 2400 return 2401 } 2402 2403 func (vS *volumeStruct) MiddlewareGetObject(containerObjectPath string, 2404 readRangeIn []ReadRangeIn, readRangeOut *[]inode.ReadPlanStep) ( 2405 response HeadResponse, err error) { 2406 2407 var ( 2408 dirEntryInodeNumber inode.InodeNumber 2409 fileOffset uint64 2410 heldLocks *heldLocksStruct 2411 inodeVolumeHandle inode.VolumeHandle 2412 readPlan []inode.ReadPlanStep 2413 readRangeInIndex int 2414 retryRequired bool 2415 stat Stat 2416 tryLockBackoffContext *tryLockBackoffContextStruct 2417 ) 2418 2419 startTime := time.Now() 2420 defer func() { 2421 var totalReadBytes uint64 2422 for _, step := range *readRangeOut { 2423 totalReadBytes += step.Length 2424 } 2425 2426 globals.MiddlewareGetObjectUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 2427 globals.MiddlewareGetObjectBytes.Add(totalReadBytes) 2428 if err != nil { 2429 globals.MiddlewareGetObjectErrors.Add(1) 2430 } 2431 }() 2432 2433 // Retry until done or failure (starting with ZERO backoff) 2434 2435 tryLockBackoffContext = &tryLockBackoffContextStruct{} 2436 2437 Restart: 2438 2439 // Perform backoff and update for each restart (starting with ZERO backoff of course) 2440 2441 tryLockBackoffContext.backoff() 2442 2443 // Construct fresh heldLocks for this restart 2444 2445 heldLocks = newHeldLocks() 2446 2447 _, dirEntryInodeNumber, _, _, retryRequired, err = 2448 vS.resolvePath( 2449 inode.RootDirInodeNumber, 2450 containerObjectPath, 2451 heldLocks, 2452 resolvePathFollowDirEntrySymlinks| 2453 resolvePathFollowDirSymlinks) 2454 2455 if nil != err { 2456 heldLocks.free() 2457 return 2458 } 2459 2460 if retryRequired { 2461 heldLocks.free() 2462 goto Restart 2463 } 2464 2465 // Now assemble response 2466 2467 stat, err = vS.getstatHelperWhileLocked(dirEntryInodeNumber) 2468 if nil != err { 2469 heldLocks.free() 2470 return 2471 } 2472 2473 response.FileSize = stat[StatSize] 2474 response.ModificationTime = stat[StatMTime] 2475 response.AttrChangeTime = stat[StatCTime] 2476 response.IsDir = (stat[StatFType] == uint64(inode.DirType)) 2477 response.InodeNumber = dirEntryInodeNumber 2478 response.NumWrites = stat[StatNumWrites] 2479 2480 // Swift thinks all directories have a size of 0 (and symlinks as well) 2481 if stat[StatFType] != uint64(inode.FileType) { 2482 response.FileSize = 0 2483 } 2484 2485 response.Metadata, err = vS.inodeVolumeHandle.GetStream(dirEntryInodeNumber, MiddlewareStream) 2486 if nil != err { 2487 if blunder.Is(err, blunder.StreamNotFound) { 2488 response.Metadata = []byte{} 2489 err = nil 2490 } else { 2491 heldLocks.free() 2492 return 2493 } 2494 } 2495 2496 // The only thing left is to construct a read plan and only regular 2497 // files have read plans. If this is not a regular file then we're 2498 // done. 2499 if stat[StatFType] != uint64(inode.FileType) { 2500 heldLocks.free() 2501 return 2502 } 2503 2504 inodeVolumeHandle = vS.inodeVolumeHandle 2505 if len(readRangeIn) == 0 { 2506 // Get ReadPlan for entire file 2507 2508 fileOffset = 0 2509 2510 readPlan, err = inodeVolumeHandle.GetReadPlan(dirEntryInodeNumber, &fileOffset, &response.FileSize) 2511 if nil != err { 2512 heldLocks.free() 2513 return 2514 } 2515 2516 _ = appendReadPlanEntries(readPlan, readRangeOut) 2517 } else { // len(readRangeIn) > 0 2518 // Append each computed range 2519 2520 for readRangeInIndex = range readRangeIn { 2521 readPlan, err = inodeVolumeHandle.GetReadPlan(dirEntryInodeNumber, readRangeIn[readRangeInIndex].Offset, readRangeIn[readRangeInIndex].Len) 2522 if nil != err { 2523 heldLocks.free() 2524 return 2525 } 2526 2527 _ = appendReadPlanEntries(readPlan, readRangeOut) 2528 } 2529 } 2530 2531 heldLocks.free() 2532 2533 err = nil 2534 return 2535 } 2536 2537 func (vS *volumeStruct) MiddlewareHeadResponse(entityPath string) (response HeadResponse, err error) { 2538 var ( 2539 dirEntryInodeNumber inode.InodeNumber 2540 heldLocks *heldLocksStruct 2541 retryRequired bool 2542 stat Stat 2543 tryLockBackoffContext *tryLockBackoffContextStruct 2544 ) 2545 2546 startTime := time.Now() 2547 defer func() { 2548 globals.MiddlewareHeadResponseUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 2549 if err != nil { 2550 globals.MiddlewareHeadResponseErrors.Add(1) 2551 } 2552 }() 2553 2554 // Retry until done or failure (starting with ZERO backoff) 2555 2556 tryLockBackoffContext = &tryLockBackoffContextStruct{} 2557 2558 Restart: 2559 2560 // Perform backoff and update for each restart (starting with ZERO backoff of course) 2561 2562 tryLockBackoffContext.backoff() 2563 2564 // Construct fresh heldLocks for this restart 2565 2566 heldLocks = newHeldLocks() 2567 2568 _, dirEntryInodeNumber, _, _, retryRequired, err = 2569 vS.resolvePath( 2570 inode.RootDirInodeNumber, 2571 entityPath, 2572 heldLocks, 2573 resolvePathFollowDirEntrySymlinks| 2574 resolvePathFollowDirSymlinks) 2575 2576 if nil != err { 2577 heldLocks.free() 2578 return 2579 } 2580 2581 if retryRequired { 2582 heldLocks.free() 2583 goto Restart 2584 } 2585 2586 // Now assemble response 2587 2588 stat, err = vS.getstatHelperWhileLocked(dirEntryInodeNumber) 2589 if nil != err { 2590 heldLocks.free() 2591 return 2592 } 2593 2594 // since resolvePathFollowDirEntrySymlinks is set on the call to 2595 // resolvePath(), above, we'll never see a symlink returned 2596 response.ModificationTime = stat[StatMTime] 2597 response.AttrChangeTime = stat[StatCTime] 2598 response.FileSize = stat[StatSize] 2599 response.IsDir = (stat[StatFType] == uint64(inode.DirType)) 2600 response.InodeNumber = dirEntryInodeNumber 2601 response.NumWrites = stat[StatNumWrites] 2602 2603 // Swift thinks all directories have a size of 0 (and symlinks as well) 2604 if stat[StatFType] != uint64(inode.FileType) { 2605 response.FileSize = 0 2606 } 2607 2608 response.Metadata, err = vS.inodeVolumeHandle.GetStream(dirEntryInodeNumber, MiddlewareStream) 2609 if nil != err { 2610 heldLocks.free() 2611 response.Metadata = []byte{} 2612 // If someone makes a directory or file via SMB/FUSE and then 2613 // HEADs it via HTTP, we'll see this error. We treat it as 2614 // though there is no metadata. The middleware is equipped to 2615 // handle this case. 2616 if blunder.Is(err, blunder.StreamNotFound) { 2617 err = nil 2618 } 2619 return 2620 } 2621 2622 heldLocks.free() 2623 return 2624 } 2625 2626 func (vS *volumeStruct) MiddlewarePost(parentDir string, baseName string, newMetaData []byte, oldMetaData []byte) (err error) { 2627 var ( 2628 dirEntryInodeNumber inode.InodeNumber 2629 existingStreamData []byte 2630 heldLocks *heldLocksStruct 2631 retryRequired bool 2632 tryLockBackoffContext *tryLockBackoffContextStruct 2633 ) 2634 2635 startTime := time.Now() 2636 defer func() { 2637 globals.MiddlewarePostUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 2638 globals.MiddlewarePostBytes.Add(uint64(len(newMetaData))) 2639 if err != nil { 2640 globals.MiddlewarePostErrors.Add(1) 2641 } 2642 }() 2643 2644 // Retry until done or failure (starting with ZERO backoff) 2645 2646 tryLockBackoffContext = &tryLockBackoffContextStruct{} 2647 2648 Restart: 2649 2650 // Perform backoff and update for each restart (starting with ZERO backoff of course) 2651 2652 tryLockBackoffContext.backoff() 2653 2654 // Construct fresh heldLocks for this restart 2655 2656 heldLocks = newHeldLocks() 2657 2658 _, dirEntryInodeNumber, _, _, retryRequired, err = 2659 vS.resolvePath( 2660 inode.RootDirInodeNumber, 2661 parentDir+"/"+baseName, 2662 heldLocks, 2663 resolvePathFollowDirEntrySymlinks| 2664 resolvePathFollowDirSymlinks| 2665 resolvePathCreateMissingPathElements| 2666 resolvePathRequireExclusiveLockOnDirEntryInode) 2667 2668 if nil != err { 2669 heldLocks.free() 2670 return 2671 } 2672 2673 if retryRequired { 2674 heldLocks.free() 2675 goto Restart 2676 } 2677 2678 // Now apply MiddlewareStream update 2679 2680 // Compare oldMetaData to existing existingStreamData to make sure that the HTTP metadata has not changed. 2681 // If it has changed, then return an error since middleware has to handle it. 2682 2683 existingStreamData, err = vS.inodeVolumeHandle.GetStream(dirEntryInodeNumber, MiddlewareStream) 2684 if nil != err { 2685 if blunder.Is(err, blunder.StreamNotFound) { 2686 err = nil 2687 existingStreamData = make([]byte, 0) 2688 } else { 2689 heldLocks.free() 2690 return 2691 } 2692 } 2693 2694 // Verify that the oldMetaData is the same as the one we think we are changing. 2695 2696 if !bytes.Equal(existingStreamData, oldMetaData) { 2697 heldLocks.free() 2698 err = blunder.NewError(blunder.TryAgainError, "MiddlewarePost(): MetaData different - existingStreamData: %v OldMetaData: %v", existingStreamData, oldMetaData) 2699 return 2700 } 2701 2702 // Change looks okay so make it. 2703 2704 err = vS.inodeVolumeHandle.PutStream(dirEntryInodeNumber, MiddlewareStream, newMetaData) 2705 if nil != err { 2706 heldLocks.free() 2707 return 2708 } 2709 2710 // PutStream() implicitly flushed... so, if it was a FileInode, we don't need to track it anymore 2711 2712 vS.untrackInFlightFileInodeData(dirEntryInodeNumber, false) 2713 2714 heldLocks.free() 2715 return 2716 } 2717 2718 func (vS *volumeStruct) MiddlewarePutComplete(vContainerName string, vObjectPath string, pObjectPaths []string, pObjectLengths []uint64, pObjectMetadata []byte) (mtime uint64, ctime uint64, fileInodeNumber inode.InodeNumber, numWrites uint64, err error) { 2719 var ( 2720 containerName string 2721 dirInodeNumber inode.InodeNumber 2722 dirEntryInodeNumber inode.InodeNumber 2723 dirEntryBasename string 2724 dirEntryInodeType inode.InodeType 2725 fileOffset uint64 2726 heldLocks *heldLocksStruct 2727 inodeVolumeHandle inode.VolumeHandle = vS.inodeVolumeHandle 2728 inodeWroteTime time.Time 2729 numPObjects int 2730 objectName string 2731 pObjectIndex int 2732 retryRequired bool 2733 stat Stat 2734 tryLockBackoffContext *tryLockBackoffContextStruct 2735 ) 2736 2737 startTime := time.Now() 2738 defer func() { 2739 globals.MiddlewarePutCompleteUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 2740 if err != nil { 2741 globals.MiddlewarePutCompleteErrors.Add(1) 2742 } 2743 }() 2744 2745 // Validate (pObjectPaths,pObjectLengths) args 2746 2747 numPObjects = len(pObjectPaths) 2748 2749 if numPObjects != len(pObjectLengths) { 2750 blunder.NewError(blunder.InvalidArgError, "MiddlewarePutComplete() expects len(pObjectPaths) == len(pObjectLengths)") 2751 return 2752 } 2753 2754 // Retry until done or failure (starting with ZERO backoff) 2755 2756 tryLockBackoffContext = &tryLockBackoffContextStruct{} 2757 2758 Restart: 2759 2760 // Perform backoff and update for each restart (starting with ZERO backoff of course) 2761 2762 tryLockBackoffContext.backoff() 2763 2764 // Construct fresh heldLocks for this restart 2765 2766 heldLocks = newHeldLocks() 2767 2768 dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, dirEntryInodeType, retryRequired, err = 2769 vS.resolvePath( 2770 inode.RootDirInodeNumber, 2771 vContainerName+"/"+vObjectPath, 2772 heldLocks, 2773 resolvePathFollowDirEntrySymlinks| 2774 resolvePathFollowDirSymlinks| 2775 resolvePathCreateMissingPathElements| 2776 resolvePathRequireExclusiveLockOnDirInode| 2777 resolvePathRequireExclusiveLockOnDirEntryInode) 2778 if nil != err { 2779 heldLocks.free() 2780 return 2781 } 2782 if retryRequired { 2783 heldLocks.free() 2784 goto Restart 2785 } 2786 2787 // The semantics of PUT mean that the existing object is discarded; with 2788 // a file we can just overwrite it, but symlinks or directories must be 2789 // removed (if possible). 2790 if dirEntryInodeType != inode.FileType { 2791 2792 if dirEntryInodeType == inode.DirType { 2793 2794 // try to unlink the directory (rmdir flushes the inodes) 2795 err = vS.rmdirActual(dirInodeNumber, dirEntryBasename, dirEntryInodeNumber) 2796 if err != nil { 2797 // the directory was probably not empty 2798 heldLocks.free() 2799 return 2800 2801 } 2802 2803 } else { 2804 // unlink the symlink (unlink flushes the inodes) 2805 err = vS.unlinkActual(dirInodeNumber, dirEntryBasename, dirEntryInodeNumber) 2806 if err != nil { 2807 2808 // ReadOnlyError is my best guess for the failure 2809 err = blunder.NewError(blunder.ReadOnlyError, 2810 "MiddlewareMkdir(): vol '%s' failed to unlink '%s': %v", 2811 vS.volumeName, vContainerName+"/"+vObjectPath, err) 2812 heldLocks.free() 2813 return 2814 } 2815 } 2816 2817 // let resolvePath() create the file 2818 dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, dirEntryInodeType, retryRequired, err = 2819 vS.resolvePath( 2820 inode.RootDirInodeNumber, 2821 vContainerName+"/"+vObjectPath, 2822 heldLocks, 2823 resolvePathFollowDirSymlinks| 2824 resolvePathCreateMissingPathElements| 2825 resolvePathDirEntryInodeMustBeFile| 2826 resolvePathRequireExclusiveLockOnDirInode| 2827 resolvePathRequireExclusiveLockOnDirEntryInode) 2828 if nil != err { 2829 heldLocks.free() 2830 return 2831 } 2832 if retryRequired { 2833 heldLocks.free() 2834 goto Restart 2835 } 2836 } 2837 2838 // Apply (pObjectPaths,pObjectLengths) to (erased) FileInode 2839 2840 inodeWroteTime = time.Now() 2841 2842 fileOffset = 0 2843 2844 for pObjectIndex = 0; pObjectIndex < numPObjects; pObjectIndex++ { 2845 _, containerName, objectName, err = utils.PathToAcctContObj(pObjectPaths[pObjectIndex]) 2846 if nil != err { 2847 heldLocks.free() 2848 logger.DebugfIDWithError(internalDebug, err, "MiddlewarePutComplete(): failed utils.PathToAcctContObj(\"%s\") for dirEntryInodeNumber 0x%016X", pObjectPaths[pObjectIndex], dirEntryInodeNumber) 2849 return 2850 } 2851 2852 err = inodeVolumeHandle.Wrote( 2853 dirEntryInodeNumber, 2854 containerName, 2855 objectName, 2856 []uint64{fileOffset}, 2857 []uint64{0}, 2858 []uint64{pObjectLengths[pObjectIndex]}, 2859 inodeWroteTime, 2860 pObjectIndex > 0) // Initial pObjectIndex == 0 case will implicitly SetSize(,0) 2861 if nil != err { 2862 heldLocks.free() 2863 logger.DebugfIDWithError(internalDebug, err, "MiddlewarePutComplete(): failed inode.Wrote() for dirEntryInodeNumber 0x%016X", dirEntryInodeNumber) 2864 return 2865 } 2866 2867 fileOffset += pObjectLengths[pObjectIndex] 2868 } 2869 2870 // Apply pObjectMetadata to FileInode (this will flush it as well) 2871 2872 err = inodeVolumeHandle.PutStream(dirEntryInodeNumber, MiddlewareStream, pObjectMetadata) 2873 if err != nil { 2874 heldLocks.free() 2875 logger.DebugfIDWithError(internalDebug, err, "MiddlewarePutComplete(): failed PutStream() for dirEntryInodeNumber 0x%016X (pObjectMetadata: %v)", dirEntryInodeNumber, pObjectMetadata) 2876 return 2877 } 2878 2879 stat, err = vS.getstatHelperWhileLocked(dirEntryInodeNumber) 2880 if nil != err { 2881 heldLocks.free() 2882 return 2883 } 2884 2885 mtime = stat[StatMTime] 2886 ctime = stat[StatCTime] 2887 fileInodeNumber = dirEntryInodeNumber 2888 numWrites = stat[StatNumWrites] 2889 2890 heldLocks.free() 2891 return 2892 } 2893 2894 func (vS *volumeStruct) MiddlewareMkdir(vContainerName string, vObjectPath string, metadata []byte) (mtime uint64, ctime uint64, inodeNumber inode.InodeNumber, numWrites uint64, err error) { 2895 var ( 2896 dirInodeNumber inode.InodeNumber 2897 dirEntryInodeNumber inode.InodeNumber 2898 dirEntryBasename string 2899 dirEntryInodeType inode.InodeType 2900 heldLocks *heldLocksStruct 2901 retryRequired bool 2902 stat Stat 2903 tryLockBackoffContext *tryLockBackoffContextStruct 2904 ) 2905 2906 startTime := time.Now() 2907 defer func() { 2908 globals.MiddlewareMkdirUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 2909 if err != nil { 2910 globals.MiddlewareMkdirErrors.Add(1) 2911 } 2912 }() 2913 2914 // Retry until done or failure (starting with ZERO backoff) 2915 2916 tryLockBackoffContext = &tryLockBackoffContextStruct{} 2917 2918 Restart: 2919 2920 // Perform backoff and update for each restart (starting with ZERO backoff of course) 2921 2922 tryLockBackoffContext.backoff() 2923 2924 // Construct fresh heldLocks for this restart 2925 2926 heldLocks = newHeldLocks() 2927 2928 // Resolve the object, locking it and its parent directory exclusive 2929 dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, dirEntryInodeType, retryRequired, err = 2930 vS.resolvePath( 2931 inode.RootDirInodeNumber, 2932 vContainerName+"/"+vObjectPath, 2933 heldLocks, 2934 resolvePathFollowDirSymlinks| 2935 resolvePathCreateMissingPathElements| 2936 resolvePathRequireExclusiveLockOnDirInode| 2937 resolvePathRequireExclusiveLockOnDirEntryInode) 2938 if nil != err { 2939 heldLocks.free() 2940 return 2941 } 2942 if retryRequired { 2943 heldLocks.free() 2944 goto Restart 2945 } 2946 2947 // The semantics of PUT for a directory object require that an existing 2948 // file or symlink be discarded and be replaced with a directory (an 2949 // existing directory is fine; it just has its headers overwritten). 2950 if dirEntryInodeType != inode.DirType { 2951 2952 // unlink the file or symlink (unlink flushes the inodes) 2953 err = vS.unlinkActual(dirInodeNumber, dirEntryBasename, dirEntryInodeNumber) 2954 if err != nil { 2955 2956 // ReadOnlyError is my best guess for the failure 2957 err = blunder.NewError(blunder.ReadOnlyError, 2958 "MiddlewareMkdir(): vol '%s' failed to unlink '%s': %v", 2959 vS.volumeName, vContainerName+"/"+vObjectPath, err) 2960 heldLocks.free() 2961 return 2962 } 2963 2964 // let resolvePath() make the directory 2965 dirInodeNumber, dirEntryInodeNumber, dirEntryBasename, dirEntryInodeType, retryRequired, err = 2966 vS.resolvePath( 2967 inode.RootDirInodeNumber, 2968 vContainerName+"/"+vObjectPath, 2969 heldLocks, 2970 resolvePathFollowDirSymlinks| 2971 resolvePathCreateMissingPathElements| 2972 resolvePathDirEntryInodeMustBeDirectory| 2973 resolvePathRequireExclusiveLockOnDirInode| 2974 resolvePathRequireExclusiveLockOnDirEntryInode) 2975 if nil != err { 2976 heldLocks.free() 2977 return 2978 } 2979 if retryRequired { 2980 heldLocks.free() 2981 goto Restart 2982 } 2983 } 2984 2985 err = vS.inodeVolumeHandle.PutStream(dirEntryInodeNumber, MiddlewareStream, metadata) 2986 if err != nil { 2987 heldLocks.free() 2988 logger.DebugfIDWithError(internalDebug, err, "MiddlewareHeadResponse(): failed PutStream() for for dirEntryInodeNumber 0x%016X (pObjectMetadata: %v)", dirEntryInodeNumber, metadata) 2989 return 2990 } 2991 2992 stat, err = vS.getstatHelperWhileLocked(dirEntryInodeNumber) 2993 if nil != err { 2994 heldLocks.free() 2995 return 2996 } 2997 2998 mtime = stat[StatMTime] 2999 ctime = stat[StatCTime] 3000 inodeNumber = dirEntryInodeNumber 3001 numWrites = stat[StatNumWrites] 3002 3003 heldLocks.free() 3004 return 3005 } 3006 3007 func (vS *volumeStruct) MiddlewarePutContainer(containerName string, oldMetadata []byte, newMetadata []byte) (err error) { 3008 var ( 3009 containerInodeLock *dlm.RWLockStruct 3010 containerInodeNumber inode.InodeNumber 3011 existingMetadata []byte 3012 newDirInodeLock *dlm.RWLockStruct 3013 newDirInodeNumber inode.InodeNumber 3014 ) 3015 3016 startTime := time.Now() 3017 defer func() { 3018 globals.MiddlewarePutContainerUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 3019 globals.MiddlewarePutContainerBytes.Add(uint64(len(newMetadata))) 3020 if err != nil { 3021 globals.MiddlewarePutContainerErrors.Add(1) 3022 } 3023 }() 3024 3025 vS.jobRWMutex.RLock() 3026 defer vS.jobRWMutex.RUnlock() 3027 3028 // Yes, it's a heavy lock to hold on the root inode. However, we 3029 // might need to add a new directory entry there, so there's not 3030 // much else we can do. 3031 rootInodeLock, err := vS.inodeVolumeHandle.GetWriteLock(inode.RootDirInodeNumber, nil) 3032 if nil != err { 3033 return 3034 } 3035 defer rootInodeLock.Unlock() 3036 3037 containerInodeNumber, err = vS.inodeVolumeHandle.Lookup(inode.RootDirInodeNumber, containerName) 3038 if err != nil && blunder.IsNot(err, blunder.NotFoundError) { 3039 return 3040 } else if err != nil { 3041 // No such container, so we create it 3042 err = validateBaseName(containerName) 3043 if err != nil { 3044 return 3045 } 3046 3047 newDirInodeNumber, err = vS.inodeVolumeHandle.CreateDir(inode.PosixModePerm, 0, 0) 3048 if err != nil { 3049 logger.ErrorWithError(err) 3050 return 3051 } 3052 3053 newDirInodeLock, err = vS.inodeVolumeHandle.GetWriteLock(newDirInodeNumber, nil) 3054 defer newDirInodeLock.Unlock() 3055 3056 err = vS.inodeVolumeHandle.PutStream(newDirInodeNumber, MiddlewareStream, newMetadata) 3057 if err != nil { 3058 logger.ErrorWithError(err) 3059 return 3060 } 3061 3062 err = vS.inodeVolumeHandle.Link(inode.RootDirInodeNumber, containerName, newDirInodeNumber, false) 3063 3064 return 3065 } 3066 3067 containerInodeLock, err = vS.inodeVolumeHandle.GetWriteLock(containerInodeNumber, nil) 3068 if err != nil { 3069 return 3070 } 3071 defer containerInodeLock.Unlock() 3072 3073 // Existing container: just update the metadata 3074 existingMetadata, err = vS.inodeVolumeHandle.GetStream(containerInodeNumber, MiddlewareStream) 3075 3076 // GetStream() will return an error if there is no "middleware" stream 3077 if err != nil && blunder.IsNot(err, blunder.StreamNotFound) { 3078 return 3079 } else if err != nil { 3080 existingMetadata = []byte{} 3081 } 3082 3083 // Only change it if the caller sent the current value 3084 if !bytes.Equal(existingMetadata, oldMetadata) { 3085 err = blunder.NewError(blunder.TryAgainError, "Metadata differs - actual: %v request: %v", existingMetadata, oldMetadata) 3086 return 3087 } 3088 err = vS.inodeVolumeHandle.PutStream(containerInodeNumber, MiddlewareStream, newMetadata) 3089 3090 return 3091 } 3092 3093 func (vS *volumeStruct) Mkdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string, filePerm inode.InodeMode) (newDirInodeNumber inode.InodeNumber, err error) { 3094 startTime := time.Now() 3095 defer func() { 3096 globals.MkdirUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 3097 if err != nil { 3098 globals.MkdirErrors.Add(1) 3099 } 3100 }() 3101 3102 vS.jobRWMutex.RLock() 3103 defer vS.jobRWMutex.RUnlock() 3104 3105 // Make sure the file basename is not too long 3106 err = validateBaseName(basename) 3107 if err != nil { 3108 return 0, err 3109 } 3110 3111 newDirInodeNumber, err = vS.inodeVolumeHandle.CreateDir(filePerm, userID, groupID) 3112 if err != nil { 3113 logger.ErrorWithError(err) 3114 return 0, err 3115 } 3116 3117 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 3118 if err != nil { 3119 return 3120 } 3121 err = inodeLock.WriteLock() 3122 if err != nil { 3123 return 3124 } 3125 defer inodeLock.Unlock() 3126 3127 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 3128 inode.NoOverride) { 3129 3130 destroyErr := vS.inodeVolumeHandle.Destroy(newDirInodeNumber) 3131 if destroyErr != nil { 3132 logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Access(F_OK) in fs.Mkdir", newDirInodeNumber) 3133 } 3134 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 3135 return 0, err 3136 } 3137 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK, 3138 inode.NoOverride) { 3139 3140 destroyErr := vS.inodeVolumeHandle.Destroy(newDirInodeNumber) 3141 if destroyErr != nil { 3142 logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Access(W_OK|X_OK) in fs.Mkdir", newDirInodeNumber) 3143 } 3144 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 3145 return 0, err 3146 } 3147 3148 err = vS.inodeVolumeHandle.Link(inodeNumber, basename, newDirInodeNumber, false) 3149 if err != nil { 3150 destroyErr := vS.inodeVolumeHandle.Destroy(newDirInodeNumber) 3151 if destroyErr != nil { 3152 logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Link() in fs.Mkdir", newDirInodeNumber) 3153 } 3154 return 0, err 3155 } 3156 3157 return newDirInodeNumber, nil 3158 } 3159 3160 func (vS *volumeStruct) RemoveXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string) (err error) { 3161 startTime := time.Now() 3162 defer func() { 3163 globals.RemoveXAttrUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 3164 if err != nil { 3165 globals.RemoveXAttrErrors.Add(1) 3166 } 3167 }() 3168 3169 vS.jobRWMutex.RLock() 3170 defer vS.jobRWMutex.RUnlock() 3171 3172 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 3173 if err != nil { 3174 return 3175 } 3176 err = inodeLock.WriteLock() 3177 if err != nil { 3178 return 3179 } 3180 defer inodeLock.Unlock() 3181 3182 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 3183 inode.NoOverride) { 3184 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 3185 return 3186 } 3187 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK, 3188 inode.OwnerOverride) { 3189 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 3190 return 3191 } 3192 3193 err = vS.inodeVolumeHandle.DeleteStream(inodeNumber, streamName) 3194 if err != nil { 3195 logger.ErrorfWithError(err, "Failed to delete XAttr %v of inode %v", streamName, inodeNumber) 3196 } 3197 3198 vS.untrackInFlightFileInodeData(inodeNumber, false) 3199 3200 return 3201 } 3202 3203 func (vS *volumeStruct) workerForMoveAndRename(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, srcDirInodeNumber inode.InodeNumber, srcBasename string, dstDirInodeNumber inode.InodeNumber, dstBasename string) (toDestroyInodeNumber inode.InodeNumber, heldLocks *heldLocksStruct, err error) { 3204 var ( 3205 dirEntryBasename string 3206 dirEntryInodeNumber inode.InodeNumber 3207 dirInodeNumber inode.InodeNumber 3208 retryRequired bool 3209 tryLockBackoffContext *tryLockBackoffContextStruct 3210 ) 3211 3212 err = validateBaseName(srcBasename) 3213 if nil != err { 3214 heldLocks = nil 3215 return 3216 } 3217 3218 err = validateBaseName(dstBasename) 3219 if nil != err { 3220 heldLocks = nil 3221 return 3222 } 3223 3224 // Retry until done or failure (starting with ZERO backoff) 3225 3226 tryLockBackoffContext = &tryLockBackoffContextStruct{} 3227 3228 Restart: 3229 3230 // Perform backoff and update for each restart (starting with ZERO backoff of course) 3231 3232 tryLockBackoffContext.backoff() 3233 3234 // Construct fresh heldLocks for this restart 3235 3236 heldLocks = newHeldLocks() 3237 3238 // Acquire WriteLock on {srcDirInodeNumber,srcBasename} & perform Access Check 3239 3240 dirInodeNumber, _, dirEntryBasename, _, retryRequired, err = 3241 vS.resolvePath( 3242 srcDirInodeNumber, 3243 srcBasename, 3244 heldLocks, 3245 resolvePathRequireExclusiveLockOnDirEntryInode| 3246 resolvePathRequireExclusiveLockOnDirInode) 3247 3248 if nil != err { 3249 heldLocks.free() 3250 heldLocks = nil 3251 err = blunder.AddError(err, blunder.NotFoundError) 3252 return 3253 } 3254 3255 if retryRequired { 3256 heldLocks.free() 3257 goto Restart 3258 } 3259 3260 if (dirInodeNumber != srcDirInodeNumber) || (dirEntryBasename != srcBasename) { 3261 heldLocks.free() 3262 heldLocks = nil 3263 err = blunder.NewError(blunder.InvalidArgError, "EINVAL") 3264 return 3265 } 3266 3267 if !vS.inodeVolumeHandle.Access(srcDirInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK, inode.NoOverride) { 3268 heldLocks.free() 3269 heldLocks = nil 3270 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 3271 return 3272 } 3273 3274 // Acquire WriteLock on dstDirInodeNumber & perform Access Check 3275 3276 _, dirEntryInodeNumber, _, _, retryRequired, err = 3277 vS.resolvePath( 3278 dstDirInodeNumber, 3279 ".", 3280 heldLocks, 3281 resolvePathDirEntryInodeMustBeDirectory| 3282 resolvePathRequireExclusiveLockOnDirEntryInode) 3283 3284 if nil != err { 3285 heldLocks.free() 3286 heldLocks = nil 3287 err = blunder.AddError(err, blunder.NotFoundError) 3288 return 3289 } 3290 3291 if retryRequired { 3292 heldLocks.free() 3293 goto Restart 3294 } 3295 3296 if dirEntryInodeNumber != dstDirInodeNumber { 3297 heldLocks.free() 3298 heldLocks = nil 3299 err = blunder.NewError(blunder.InvalidArgError, "EINVAL") 3300 return 3301 } 3302 3303 if !vS.inodeVolumeHandle.Access(dstDirInodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK, inode.NoOverride) { 3304 heldLocks.free() 3305 heldLocks = nil 3306 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 3307 return 3308 } 3309 3310 // Acquire WriteLock on dstBasename if it exists 3311 3312 dirInodeNumber, _, dirEntryBasename, _, retryRequired, err = 3313 vS.resolvePath( 3314 dstDirInodeNumber, 3315 dstBasename, 3316 heldLocks, 3317 resolvePathRequireExclusiveLockOnDirEntryInode) 3318 3319 if nil == err { 3320 if retryRequired { 3321 heldLocks.free() 3322 goto Restart 3323 } 3324 3325 if (dirInodeNumber != dstDirInodeNumber) || (dirEntryBasename != dstBasename) { 3326 heldLocks.free() 3327 heldLocks = nil 3328 err = blunder.NewError(blunder.InvalidArgError, "EINVAL") 3329 return 3330 } 3331 } else { 3332 // This is actually OK... it means the target path of the Rename() isn't being potentially replaced 3333 } 3334 3335 // Locks held & Access Checks succeeded... time to do the Move 3336 3337 toDestroyInodeNumber, err = vS.inodeVolumeHandle.Move(srcDirInodeNumber, srcBasename, dstDirInodeNumber, dstBasename) 3338 3339 return // err returned from inode.Move() suffices here 3340 } 3341 3342 func (vS *volumeStruct) Rename(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, srcDirInodeNumber inode.InodeNumber, srcBasename string, dstDirInodeNumber inode.InodeNumber, dstBasename string) (err error) { 3343 var ( 3344 destroyErr error 3345 heldLocks *heldLocksStruct 3346 toDestroyInodeNumber inode.InodeNumber 3347 ) 3348 3349 startTime := time.Now() 3350 defer func() { 3351 globals.RenameUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 3352 if err != nil { 3353 globals.RenameErrors.Add(1) 3354 } 3355 }() 3356 3357 vS.jobRWMutex.RLock() 3358 defer vS.jobRWMutex.RUnlock() 3359 3360 toDestroyInodeNumber, heldLocks, err = vS.workerForMoveAndRename(userID, groupID, otherGroupIDs, srcDirInodeNumber, srcBasename, dstDirInodeNumber, dstBasename) 3361 3362 if (nil == err) && (inode.InodeNumber(0) != toDestroyInodeNumber) { 3363 destroyErr = vS.inodeVolumeHandle.Destroy(toDestroyInodeNumber) 3364 if nil != destroyErr { 3365 logger.ErrorWithError(destroyErr) 3366 } 3367 } 3368 3369 if nil != heldLocks { 3370 heldLocks.free() 3371 } 3372 3373 return 3374 } 3375 3376 func (vS *volumeStruct) Move(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, srcDirInodeNumber inode.InodeNumber, srcBasename string, dstDirInodeNumber inode.InodeNumber, dstBasename string) (toDestroyInodeNumber inode.InodeNumber, err error) { 3377 var ( 3378 heldLocks *heldLocksStruct 3379 ) 3380 3381 startTime := time.Now() 3382 defer func() { 3383 globals.MoveUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 3384 if err != nil { 3385 globals.MoveErrors.Add(1) 3386 } 3387 }() 3388 3389 vS.jobRWMutex.RLock() 3390 defer vS.jobRWMutex.RUnlock() 3391 3392 toDestroyInodeNumber, heldLocks, err = vS.workerForMoveAndRename(userID, groupID, otherGroupIDs, srcDirInodeNumber, srcBasename, dstDirInodeNumber, dstBasename) 3393 3394 if nil != heldLocks { 3395 heldLocks.free() 3396 } 3397 3398 return 3399 } 3400 3401 func (vS *volumeStruct) Destroy(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (err error) { 3402 var ( 3403 inodeLock *dlm.RWLockStruct 3404 ) 3405 3406 startTime := time.Now() 3407 defer func() { 3408 globals.DestroyUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 3409 if err != nil { 3410 globals.DestroyErrors.Add(1) 3411 } 3412 }() 3413 3414 vS.jobRWMutex.RLock() 3415 3416 inodeLock, err = vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 3417 if nil != err { 3418 vS.jobRWMutex.RUnlock() 3419 return 3420 } 3421 err = inodeLock.WriteLock() 3422 if nil != err { 3423 vS.jobRWMutex.RUnlock() 3424 return 3425 } 3426 3427 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 3428 inode.NoOverride) { 3429 _ = inodeLock.Unlock() 3430 vS.jobRWMutex.RUnlock() 3431 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 3432 return 3433 } 3434 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK, 3435 inode.OwnerOverride) { 3436 _ = inodeLock.Unlock() 3437 vS.jobRWMutex.RUnlock() 3438 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 3439 return 3440 } 3441 3442 err = vS.inodeVolumeHandle.Destroy(inodeNumber) 3443 3444 _ = inodeLock.Unlock() 3445 vS.jobRWMutex.RUnlock() 3446 3447 return 3448 } 3449 3450 func (vS *volumeStruct) Read(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, offset uint64, length uint64, profiler *utils.Profiler) (buf []byte, err error) { 3451 startTime := time.Now() 3452 defer func() { 3453 globals.ReadUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 3454 globals.ReadBytes.Add(uint64(len(buf))) 3455 if err != nil { 3456 globals.ReadErrors.Add(1) 3457 } 3458 }() 3459 3460 vS.jobRWMutex.RLock() 3461 defer vS.jobRWMutex.RUnlock() 3462 3463 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 3464 if err != nil { 3465 return 3466 } 3467 err = inodeLock.ReadLock() 3468 if err != nil { 3469 return 3470 } 3471 defer inodeLock.Unlock() 3472 3473 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 3474 inode.NoOverride) { 3475 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 3476 return 3477 } 3478 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK, 3479 inode.OwnerOverride) { 3480 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 3481 return 3482 } 3483 3484 inodeType, err := vS.inodeVolumeHandle.GetType(inodeNumber) 3485 if err != nil { 3486 logger.ErrorfWithError(err, "couldn't get type for inode %v", inodeNumber) 3487 return buf, err 3488 } 3489 // Make sure the inode number is for a file inode 3490 if inodeType != inode.FileType { 3491 err = fmt.Errorf("%s: expected inode %v to be a file inode, got %v", utils.GetFnName(), inodeNumber, inodeType) 3492 logger.ErrorWithError(err) 3493 return buf, blunder.AddError(err, blunder.NotFileError) 3494 } 3495 3496 profiler.AddEventNow("before inode.Read()") 3497 buf, err = vS.inodeVolumeHandle.Read(inodeNumber, offset, length, profiler) 3498 profiler.AddEventNow("after inode.Read()") 3499 if uint64(len(buf)) > length { 3500 err = fmt.Errorf("%s: Buf length %v is greater than supplied length %v", utils.GetFnName(), uint64(len(buf)), length) 3501 logger.ErrorWithError(err) 3502 return buf, blunder.AddError(err, blunder.IOError) 3503 } 3504 3505 return buf, err 3506 } 3507 3508 func (vS *volumeStruct) readdirHelper(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, maxEntries uint64, prevReturned ...interface{}) (dirEntries []inode.DirEntry, statEntries []Stat, numEntries uint64, areMoreEntries bool, err error) { 3509 var ( 3510 dirEntryIndex uint64 3511 dlmCallerID dlm.CallerID 3512 inodeLock *dlm.RWLockStruct 3513 inodeVolumeHandle inode.VolumeHandle 3514 internalErr error 3515 tryLockBackoffContext *tryLockBackoffContextStruct 3516 ) 3517 3518 vS.jobRWMutex.RLock() 3519 defer vS.jobRWMutex.RUnlock() 3520 3521 dlmCallerID = dlm.GenerateCallerID() 3522 inodeVolumeHandle = vS.inodeVolumeHandle 3523 3524 tryLockBackoffContext = &tryLockBackoffContextStruct{} 3525 3526 Restart: 3527 3528 tryLockBackoffContext.backoff() 3529 3530 inodeLock, err = inodeVolumeHandle.AttemptReadLock(inodeNumber, dlmCallerID) 3531 if nil != err { 3532 goto Restart 3533 } 3534 3535 if !inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, inode.NoOverride) { 3536 internalErr = inodeLock.Unlock() 3537 if nil != internalErr { 3538 logger.Fatalf("Failure unlocking a held LockID %s: %v", inodeLock.LockID, internalErr) 3539 } 3540 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 3541 return 3542 } 3543 if !inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK, inode.OwnerOverride) { 3544 internalErr = inodeLock.Unlock() 3545 if nil != internalErr { 3546 logger.Fatalf("Failure unlocking a held LockID %s: %v", inodeLock.LockID, internalErr) 3547 } 3548 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 3549 return 3550 } 3551 3552 dirEntries, areMoreEntries, err = inodeVolumeHandle.ReadDir(inodeNumber, maxEntries, 0, prevReturned...) 3553 3554 internalErr = inodeLock.Unlock() 3555 if nil != internalErr { 3556 logger.Fatalf("Failure unlocking a held LockID %s: %v", inodeLock.LockID, internalErr) 3557 } 3558 3559 if nil != err { 3560 return 3561 } 3562 3563 // Now go back and fill in (dirEntries.Type and) statEntries 3564 3565 numEntries = uint64(len(dirEntries)) 3566 3567 statEntries = make([]Stat, numEntries, numEntries) 3568 3569 for dirEntryIndex = 0; dirEntryIndex < numEntries; dirEntryIndex++ { 3570 inodeLock, err = inodeVolumeHandle.AttemptReadLock(dirEntries[dirEntryIndex].InodeNumber, dlmCallerID) 3571 if nil != err { 3572 goto Restart 3573 } 3574 3575 statEntries[dirEntryIndex], err = vS.getstatHelperWhileLocked(dirEntries[dirEntryIndex].InodeNumber) 3576 if nil != err { 3577 internalErr = inodeLock.Unlock() 3578 if nil != internalErr { 3579 logger.Fatalf("Failure unlocking a held LockID %s: %v", inodeLock.LockID, internalErr) 3580 } 3581 return 3582 } 3583 3584 dirEntries[dirEntryIndex].Type = inode.InodeType(statEntries[dirEntryIndex][StatFType]) 3585 3586 internalErr = inodeLock.Unlock() 3587 if nil != internalErr { 3588 logger.Fatalf("Failure unlocking a held LockID %s: %v", inodeLock.LockID, internalErr) 3589 } 3590 } 3591 3592 return 3593 } 3594 3595 func (vS *volumeStruct) Readdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, maxEntries uint64, prevReturned ...interface{}) (entries []inode.DirEntry, numEntries uint64, areMoreEntries bool, err error) { 3596 startTime := time.Now() 3597 defer func() { 3598 globals.ReaddirUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 3599 globals.ReaddirEntries.Add(uint64(len(entries))) 3600 if err != nil { 3601 globals.ReaddirErrors.Add(1) 3602 } 3603 }() 3604 3605 entries, _, numEntries, areMoreEntries, err = vS.readdirHelper(userID, groupID, otherGroupIDs, inodeNumber, maxEntries, prevReturned...) 3606 3607 return 3608 } 3609 3610 func (vS *volumeStruct) ReaddirPlus(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, maxEntries uint64, prevReturned ...interface{}) (dirEntries []inode.DirEntry, statEntries []Stat, numEntries uint64, areMoreEntries bool, err error) { 3611 startTime := time.Now() 3612 defer func() { 3613 globals.ReaddirPlusUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 3614 globals.ReaddirPlusBytes.Add(uint64(len(dirEntries))) 3615 if err != nil { 3616 globals.ReaddirPlusErrors.Add(1) 3617 } 3618 }() 3619 3620 dirEntries, statEntries, numEntries, areMoreEntries, err = vS.readdirHelper(userID, groupID, otherGroupIDs, inodeNumber, maxEntries, prevReturned...) 3621 3622 return 3623 } 3624 3625 func (vS *volumeStruct) Readsymlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber) (target string, err error) { 3626 startTime := time.Now() 3627 defer func() { 3628 globals.ReadsymlinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 3629 if err != nil { 3630 globals.ReadsymlinkErrors.Add(1) 3631 } 3632 }() 3633 3634 vS.jobRWMutex.RLock() 3635 defer vS.jobRWMutex.RUnlock() 3636 3637 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 3638 if err != nil { 3639 return 3640 } 3641 err = inodeLock.ReadLock() 3642 if err != nil { 3643 return 3644 } 3645 defer inodeLock.Unlock() 3646 3647 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 3648 inode.NoOverride) { 3649 3650 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 3651 return 3652 } 3653 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.R_OK, 3654 inode.NoOverride) { 3655 3656 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 3657 return 3658 } 3659 3660 target, err = vS.inodeVolumeHandle.GetSymlink(inodeNumber) 3661 3662 return target, err 3663 } 3664 3665 func (vS *volumeStruct) Resize(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, newSize uint64) (err error) { 3666 startTime := time.Now() 3667 defer func() { 3668 globals.ResizeUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 3669 if err != nil { 3670 globals.ResizeErrors.Add(1) 3671 } 3672 }() 3673 3674 vS.jobRWMutex.RLock() 3675 defer vS.jobRWMutex.RUnlock() 3676 3677 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 3678 if err != nil { 3679 return 3680 } 3681 err = inodeLock.WriteLock() 3682 if err != nil { 3683 return 3684 } 3685 defer inodeLock.Unlock() 3686 3687 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 3688 inode.NoOverride) { 3689 3690 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 3691 return 3692 } 3693 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK, 3694 inode.OwnerOverride) { 3695 3696 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 3697 return 3698 } 3699 3700 err = vS.inodeVolumeHandle.SetSize(inodeNumber, newSize) 3701 vS.untrackInFlightFileInodeData(inodeNumber, false) 3702 3703 return err 3704 } 3705 3706 func (vS *volumeStruct) Rmdir(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string) (err error) { 3707 startTime := time.Now() 3708 defer func() { 3709 globals.RmdirUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 3710 if err != nil { 3711 globals.RmdirErrors.Add(1) 3712 } 3713 }() 3714 3715 vS.jobRWMutex.RLock() 3716 defer vS.jobRWMutex.RUnlock() 3717 3718 callerID := dlm.GenerateCallerID() 3719 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, callerID) 3720 if err != nil { 3721 return 3722 } 3723 err = inodeLock.WriteLock() 3724 if err != nil { 3725 return 3726 } 3727 defer inodeLock.Unlock() 3728 3729 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 3730 inode.NoOverride) { 3731 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 3732 return 3733 } 3734 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK, 3735 inode.NoOverride) { 3736 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 3737 return 3738 } 3739 3740 basenameInodeNumber, err := vS.inodeVolumeHandle.Lookup(inodeNumber, basename) 3741 if nil != err { 3742 return 3743 } 3744 3745 basenameInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(basenameInodeNumber, callerID) 3746 if err != nil { 3747 return 3748 } 3749 err = basenameInodeLock.WriteLock() 3750 if err != nil { 3751 return 3752 } 3753 defer basenameInodeLock.Unlock() 3754 3755 // no permissions are required on the target directory 3756 3757 err = vS.rmdirActual(inodeNumber, basename, basenameInodeNumber) 3758 return 3759 } 3760 3761 func (vS *volumeStruct) rmdirActual(inodeNumber inode.InodeNumber, basename string, basenameInodeNumber inode.InodeNumber) (err error) { 3762 var ( 3763 basenameInodeType inode.InodeType 3764 dirEntries uint64 3765 toDestroyInodeNumber inode.InodeNumber 3766 ) 3767 3768 basenameInodeType, err = vS.inodeVolumeHandle.GetType(basenameInodeNumber) 3769 if nil != err { 3770 return 3771 } 3772 3773 if inode.DirType != basenameInodeType { 3774 err = fmt.Errorf("Rmdir() called on non-Directory") 3775 err = blunder.AddError(err, blunder.NotDirError) 3776 return 3777 } 3778 3779 dirEntries, err = vS.inodeVolumeHandle.NumDirEntries(basenameInodeNumber) 3780 if nil != err { 3781 return 3782 } 3783 3784 if 2 != dirEntries { 3785 err = fmt.Errorf("Directory not empty") 3786 err = blunder.AddError(err, blunder.NotEmptyError) 3787 return 3788 } 3789 3790 toDestroyInodeNumber, err = vS.inodeVolumeHandle.Unlink(inodeNumber, basename, false) 3791 if nil != err { 3792 return 3793 } 3794 3795 if inode.InodeNumber(0) != toDestroyInodeNumber { 3796 err = vS.inodeVolumeHandle.Destroy(basenameInodeNumber) 3797 if nil != err { 3798 return 3799 } 3800 } 3801 3802 return 3803 } 3804 3805 func (vS *volumeStruct) Setstat(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, stat Stat) (err error) { 3806 startTime := time.Now() 3807 defer func() { 3808 globals.SetstatUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 3809 if err != nil { 3810 globals.SetstatErrors.Add(1) 3811 } 3812 }() 3813 3814 vS.jobRWMutex.RLock() 3815 defer vS.jobRWMutex.RUnlock() 3816 3817 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 3818 if err != nil { 3819 return 3820 } 3821 err = inodeLock.WriteLock() 3822 if err != nil { 3823 return 3824 } 3825 defer inodeLock.Unlock() 3826 3827 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.P_OK, 3828 inode.NoOverride) { 3829 err = blunder.NewError(blunder.NotPermError, "EPERM") 3830 return 3831 } 3832 3833 // perform all permissions checks before making any changes 3834 // 3835 // changing the filesize requires write permission 3836 _, ok := stat[StatSize] 3837 if ok { 3838 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK, 3839 inode.OwnerOverride) { 3840 err = blunder.NewError(blunder.NotPermError, "EPERM") 3841 return 3842 } 3843 } 3844 3845 // most other attributes can only be changed by the owner of the file 3846 ownerOnly := []StatKey{StatCTime, StatCRTime, StatMTime, StatATime, StatMode, StatUserID, StatGroupID} 3847 for _, key := range ownerOnly { 3848 _, ok := stat[key] 3849 if ok { 3850 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.P_OK, 3851 inode.NoOverride) { 3852 err = blunder.NewError(blunder.NotPermError, "EPERM") 3853 return 3854 } 3855 break 3856 } 3857 } 3858 3859 // the superuser (root) is the only one that can change the owner of the file to a 3860 // different user, but the owner of the file can perform a no-op "change" in 3861 // ownership 3862 newUserID, settingUserID := stat[StatUserID] 3863 if settingUserID && userID != inode.InodeRootUserID { 3864 if userID != inode.InodeUserID(newUserID) { 3865 err = blunder.NewError(blunder.NotPermError, "EPERM") 3866 return 3867 } 3868 } 3869 3870 // the group can only be changed to the current group or another group the owner 3871 // is in (unless its the superuser asking) 3872 newGroupID, settingGroupID := stat[StatGroupID] 3873 if settingGroupID && groupID != inode.InodeGroupID(newGroupID) && userID != inode.InodeRootUserID { 3874 3875 err = blunder.NewError(blunder.NotPermError, "EPERM") 3876 for _, otherID := range otherGroupIDs { 3877 if inode.InodeGroupID(newGroupID) == otherID { 3878 err = nil 3879 break 3880 } 3881 } 3882 if err != nil { 3883 return 3884 } 3885 } 3886 3887 // sanity checks for invalid/illegal values 3888 if settingUserID { 3889 // Since we are using a uint64 to convey a uint32 value, make sure we didn't get something too big 3890 if newUserID > uint64(math.MaxUint32) { 3891 err = fmt.Errorf("%s: userID is too large - value is %v, max is %v.", utils.GetFnName(), newUserID, uint64(math.MaxUint32)) 3892 err = blunder.AddError(err, blunder.InvalidUserIDError) 3893 return 3894 } 3895 } 3896 3897 if settingGroupID { 3898 // Since we are using a uint64 to convey a uint32 value, make sure we didn't get something too big 3899 if newGroupID > uint64(math.MaxUint32) { 3900 err = fmt.Errorf("%s: groupID is too large - value is %v, max is %v.", utils.GetFnName(), newGroupID, uint64(math.MaxUint32)) 3901 err = blunder.AddError(err, blunder.InvalidGroupIDError) 3902 return 3903 } 3904 } 3905 3906 filePerm, settingFilePerm := stat[StatMode] 3907 if settingFilePerm { 3908 // Since we are using a uint64 to convey a 12 bit value, make sure we didn't get something too big 3909 if filePerm >= 1<<12 { 3910 err = fmt.Errorf("%s: filePerm is too large - value is %v, max is %v.", utils.GetFnName(), 3911 filePerm, 1<<12) 3912 err = blunder.AddError(err, blunder.InvalidFileModeError) 3913 return 3914 } 3915 } 3916 3917 // get to work setting things 3918 // 3919 // Set permissions, if present in the map 3920 if settingFilePerm { 3921 err = vS.inodeVolumeHandle.SetPermMode(inodeNumber, inode.InodeMode(filePerm)) 3922 if err != nil { 3923 logger.ErrorWithError(err) 3924 return err 3925 } 3926 } 3927 3928 // set owner and/or group owner, if present in the map 3929 err = nil 3930 if settingUserID && settingGroupID { 3931 err = vS.inodeVolumeHandle.SetOwnerUserIDGroupID(inodeNumber, inode.InodeUserID(newUserID), 3932 inode.InodeGroupID(newGroupID)) 3933 } else if settingUserID { 3934 err = vS.inodeVolumeHandle.SetOwnerUserID(inodeNumber, inode.InodeUserID(newUserID)) 3935 } else if settingGroupID { 3936 err = vS.inodeVolumeHandle.SetOwnerGroupID(inodeNumber, inode.InodeGroupID(newGroupID)) 3937 } 3938 if err != nil { 3939 logger.ErrorWithError(err) 3940 return 3941 } 3942 3943 // Set crtime, if present in the map 3944 crtime, ok := stat[StatCRTime] 3945 if ok { 3946 newCreationTime := time.Unix(0, int64(crtime)) 3947 err = vS.inodeVolumeHandle.SetCreationTime(inodeNumber, newCreationTime) 3948 if err != nil { 3949 logger.ErrorWithError(err) 3950 return err 3951 } 3952 } 3953 3954 // Set mtime, if present in the map 3955 mtime, ok := stat[StatMTime] 3956 if ok { 3957 newModificationTime := time.Unix(0, int64(mtime)) 3958 err = vS.inodeVolumeHandle.SetModificationTime(inodeNumber, newModificationTime) 3959 if err != nil { 3960 logger.ErrorWithError(err) 3961 return err 3962 } 3963 } 3964 3965 // Set atime, if present in the map 3966 atime, ok := stat[StatATime] 3967 if ok { 3968 newAccessTime := time.Unix(0, int64(atime)) 3969 err = vS.inodeVolumeHandle.SetAccessTime(inodeNumber, newAccessTime) 3970 if err != nil { 3971 logger.ErrorWithError(err) 3972 return err 3973 } 3974 } 3975 3976 // ctime is used to reliably determine whether the contents of a file 3977 // have changed so it cannot be altered by a client (some security 3978 // software depends on this) 3979 ctime, ok := stat[StatCTime] 3980 if ok { 3981 newAccessTime := time.Unix(0, int64(ctime)) 3982 logger.Infof("%s: ignoring attempt to change ctime to %v on volume '%s' inode %v", 3983 utils.GetFnName(), newAccessTime, vS.volumeName, inodeNumber) 3984 } 3985 3986 // Set size, if present in the map 3987 size, ok := stat[StatSize] 3988 if ok { 3989 err = vS.inodeVolumeHandle.SetSize(inodeNumber, size) 3990 if err != nil { 3991 logger.ErrorWithError(err) 3992 return err 3993 } 3994 } 3995 3996 return 3997 } 3998 3999 func (vS *volumeStruct) SetXAttr(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, streamName string, value []byte, flags int) (err error) { 4000 startTime := time.Now() 4001 defer func() { 4002 globals.SetXAttrUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 4003 if err != nil { 4004 globals.SetXAttrErrors.Add(1) 4005 } 4006 }() 4007 4008 vS.jobRWMutex.RLock() 4009 defer vS.jobRWMutex.RUnlock() 4010 4011 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 4012 if err != nil { 4013 return 4014 } 4015 err = inodeLock.WriteLock() 4016 if err != nil { 4017 return 4018 } 4019 defer inodeLock.Unlock() 4020 4021 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 4022 inode.NoOverride) { 4023 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 4024 return 4025 } 4026 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK, 4027 inode.OwnerOverride) { 4028 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 4029 return 4030 } 4031 4032 switch flags { 4033 case SetXAttrCreateOrReplace: 4034 break 4035 case SetXAttrCreate: 4036 _, err = vS.GetXAttr(userID, groupID, otherGroupIDs, inodeNumber, streamName) 4037 if err == nil { 4038 return blunder.AddError(err, blunder.FileExistsError) 4039 } 4040 case SetXAttrReplace: 4041 _, err = vS.GetXAttr(userID, groupID, otherGroupIDs, inodeNumber, streamName) 4042 if err != nil { 4043 return blunder.AddError(err, blunder.StreamNotFound) 4044 } 4045 default: 4046 return blunder.AddError(err, blunder.InvalidArgError) 4047 } 4048 4049 err = vS.inodeVolumeHandle.PutStream(inodeNumber, streamName, value) 4050 if err != nil { 4051 logger.ErrorfWithError(err, "Failed to set XAttr %v to inode %v", streamName, inodeNumber) 4052 } 4053 4054 vS.untrackInFlightFileInodeData(inodeNumber, false) 4055 4056 return 4057 } 4058 4059 func (vS *volumeStruct) StatVfs() (statVFS StatVFS, err error) { 4060 startTime := time.Now() 4061 defer func() { 4062 globals.StatVfsUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 4063 if err != nil { 4064 globals.StatVfsErrors.Add(1) 4065 } 4066 }() 4067 4068 vS.jobRWMutex.RLock() 4069 defer vS.jobRWMutex.RUnlock() 4070 4071 statVFS = make(map[StatVFSKey]uint64) 4072 4073 statVFS[StatVFSFilesystemID] = vS.inodeVolumeHandle.GetFSID() 4074 statVFS[StatVFSBlockSize] = vS.reportedBlockSize 4075 statVFS[StatVFSFragmentSize] = vS.reportedFragmentSize 4076 statVFS[StatVFSTotalBlocks] = vS.reportedNumBlocks 4077 statVFS[StatVFSFreeBlocks] = vS.reportedNumBlocks 4078 statVFS[StatVFSAvailBlocks] = vS.reportedNumBlocks 4079 statVFS[StatVFSTotalInodes] = vS.reportedNumInodes 4080 statVFS[StatVFSFreeInodes] = vS.reportedNumInodes 4081 statVFS[StatVFSAvailInodes] = vS.reportedNumInodes 4082 statVFS[StatVFSMountFlags] = 0 4083 statVFS[StatVFSMaxFilenameLen] = FileNameMax 4084 4085 return statVFS, nil 4086 } 4087 4088 func (vS *volumeStruct) Symlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string, target string) (symlinkInodeNumber inode.InodeNumber, err error) { 4089 startTime := time.Now() 4090 defer func() { 4091 globals.SymlinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 4092 if err != nil { 4093 globals.SymlinkErrors.Add(1) 4094 } 4095 }() 4096 4097 vS.jobRWMutex.RLock() 4098 defer vS.jobRWMutex.RUnlock() 4099 4100 err = validateBaseName(basename) 4101 if err != nil { 4102 return 4103 } 4104 4105 err = validateFullPath(target) 4106 if err != nil { 4107 return 4108 } 4109 4110 // Mode for symlinks defaults to rwxrwxrwx, i.e. inode.PosixModePerm 4111 symlinkInodeNumber, err = vS.inodeVolumeHandle.CreateSymlink(target, inode.PosixModePerm, userID, groupID) 4112 if err != nil { 4113 return 4114 } 4115 4116 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 4117 if err != nil { 4118 return 4119 } 4120 err = inodeLock.WriteLock() 4121 if err != nil { 4122 return 4123 } 4124 defer inodeLock.Unlock() 4125 4126 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 4127 inode.NoOverride) { 4128 4129 destroyErr := vS.inodeVolumeHandle.Destroy(symlinkInodeNumber) 4130 if destroyErr != nil { 4131 logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Access(F_OK) in fs.Symlink", symlinkInodeNumber) 4132 } 4133 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 4134 return 4135 } 4136 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK, 4137 inode.NoOverride) { 4138 4139 destroyErr := vS.inodeVolumeHandle.Destroy(symlinkInodeNumber) 4140 if destroyErr != nil { 4141 logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Access(W_OK|X_OK) in fs.Symlink", symlinkInodeNumber) 4142 } 4143 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 4144 return 4145 } 4146 4147 err = vS.inodeVolumeHandle.Link(inodeNumber, basename, symlinkInodeNumber, false) 4148 if err != nil { 4149 destroyErr := vS.inodeVolumeHandle.Destroy(symlinkInodeNumber) 4150 if destroyErr != nil { 4151 logger.WarnfWithError(destroyErr, "couldn't destroy inode %v after failed Link() in fs.Symlink", symlinkInodeNumber) 4152 } 4153 return 4154 } 4155 4156 return 4157 } 4158 4159 func (vS *volumeStruct) Unlink(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, basename string) (err error) { 4160 startTime := time.Now() 4161 defer func() { 4162 globals.UnlinkUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 4163 if err != nil { 4164 globals.UnlinkErrors.Add(1) 4165 } 4166 }() 4167 4168 vS.jobRWMutex.RLock() 4169 defer vS.jobRWMutex.RUnlock() 4170 4171 callerID := dlm.GenerateCallerID() 4172 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, callerID) 4173 if err != nil { 4174 return 4175 } 4176 err = inodeLock.WriteLock() 4177 if err != nil { 4178 return 4179 } 4180 defer inodeLock.Unlock() 4181 4182 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 4183 inode.NoOverride) { 4184 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 4185 return 4186 } 4187 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK|inode.X_OK, 4188 inode.NoOverride) { 4189 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 4190 return 4191 } 4192 4193 basenameInodeNumber, err := vS.inodeVolumeHandle.Lookup(inodeNumber, basename) 4194 if nil != err { 4195 return 4196 } 4197 4198 basenameInodeLock, err := vS.inodeVolumeHandle.InitInodeLock(basenameInodeNumber, callerID) 4199 if err != nil { 4200 return 4201 } 4202 err = basenameInodeLock.WriteLock() 4203 if err != nil { 4204 return 4205 } 4206 defer basenameInodeLock.Unlock() 4207 4208 err = vS.unlinkActual(inodeNumber, basename, basenameInodeNumber) 4209 return 4210 } 4211 4212 func (vS *volumeStruct) unlinkActual(inodeNumber inode.InodeNumber, basename string, basenameInodeNumber inode.InodeNumber) (err error) { 4213 var ( 4214 basenameInodeType inode.InodeType 4215 toDestroyInodeNumber inode.InodeNumber 4216 ) 4217 4218 basenameInodeType, err = vS.inodeVolumeHandle.GetType(basenameInodeNumber) 4219 if nil != err { 4220 return 4221 } 4222 4223 if inode.DirType == basenameInodeType { 4224 err = fmt.Errorf("Unlink() called on a Directory") 4225 err = blunder.AddError(err, blunder.IsDirError) 4226 return 4227 } 4228 4229 toDestroyInodeNumber, err = vS.inodeVolumeHandle.Unlink(inodeNumber, basename, false) 4230 if nil != err { 4231 return 4232 } 4233 4234 if inode.InodeNumber(0) != toDestroyInodeNumber { 4235 vS.untrackInFlightFileInodeData(basenameInodeNumber, false) 4236 err = vS.inodeVolumeHandle.Destroy(toDestroyInodeNumber) 4237 } 4238 4239 return 4240 } 4241 4242 func (vS *volumeStruct) VolumeName() (volumeName string) { 4243 startTime := time.Now() 4244 4245 volumeName = vS.volumeName 4246 globals.VolumeNameUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 4247 return 4248 } 4249 4250 func (vS *volumeStruct) Write(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, offset uint64, buf []byte, profiler *utils.Profiler) (size uint64, err error) { 4251 startTime := time.Now() 4252 defer func() { 4253 globals.WriteUsec.Add(uint64(time.Since(startTime) / time.Microsecond)) 4254 globals.WriteBytes.Add(size) 4255 if err != nil { 4256 globals.WriteErrors.Add(1) 4257 } 4258 }() 4259 4260 vS.jobRWMutex.RLock() 4261 defer vS.jobRWMutex.RUnlock() 4262 4263 logger.Tracef("fs.Write(): starting volume '%s' inode %v offset %v len %v", 4264 vS.volumeName, inodeNumber, offset, len(buf)) 4265 4266 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 4267 if err != nil { 4268 return 4269 } 4270 err = inodeLock.WriteLock() 4271 if err != nil { 4272 return 4273 } 4274 defer inodeLock.Unlock() 4275 4276 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 4277 inode.NoOverride) { 4278 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 4279 return 4280 } 4281 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK, 4282 inode.OwnerOverride) { 4283 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 4284 return 4285 } 4286 4287 profiler.AddEventNow("before inode.Write()") 4288 err = vS.inodeVolumeHandle.Write(inodeNumber, offset, buf, profiler) 4289 profiler.AddEventNow("after inode.Write()") 4290 // write to Swift presumably succeeds or fails as a whole 4291 if err != nil { 4292 return 0, err 4293 } 4294 4295 logger.Tracef("fs.Write(): tracking write volume '%s' inode %v", vS.volumeName, inodeNumber) 4296 vS.trackInFlightFileInodeData(inodeNumber) 4297 size = uint64(len(buf)) 4298 4299 return 4300 } 4301 4302 func (vS *volumeStruct) Wrote(userID inode.InodeUserID, groupID inode.InodeGroupID, otherGroupIDs []inode.InodeGroupID, inodeNumber inode.InodeNumber, containerName string, objectName string, fileOffset []uint64, objectOffset []uint64, length []uint64, wroteTime uint64) (err error) { 4303 vS.jobRWMutex.RLock() 4304 defer vS.jobRWMutex.RUnlock() 4305 4306 inodeLock, err := vS.inodeVolumeHandle.InitInodeLock(inodeNumber, nil) 4307 if err != nil { 4308 return 4309 } 4310 err = inodeLock.WriteLock() 4311 if err != nil { 4312 return 4313 } 4314 defer inodeLock.Unlock() 4315 4316 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.F_OK, 4317 inode.NoOverride) { 4318 err = blunder.NewError(blunder.NotFoundError, "ENOENT") 4319 return 4320 } 4321 if !vS.inodeVolumeHandle.Access(inodeNumber, userID, groupID, otherGroupIDs, inode.W_OK, 4322 inode.OwnerOverride) { 4323 err = blunder.NewError(blunder.PermDeniedError, "EACCES") 4324 return 4325 } 4326 4327 err = vS.inodeVolumeHandle.Flush(inodeNumber, false) 4328 vS.untrackInFlightFileInodeData(inodeNumber, false) 4329 4330 inodeWroteTime := time.Unix(0, int64(wroteTime)) 4331 4332 err = vS.inodeVolumeHandle.Wrote(inodeNumber, containerName, objectName, fileOffset, objectOffset, length, inodeWroteTime, true) 4333 4334 return // err, as set by inode.Wrote(), is sufficient 4335 } 4336 4337 func validateBaseName(baseName string) (err error) { 4338 // Make sure the file baseName is not too long 4339 baseLen := len(baseName) 4340 if baseLen > FileNameMax { 4341 err = fmt.Errorf("%s: basename is too long. Length %v, max %v", utils.GetFnName(), baseLen, FileNameMax) 4342 logger.ErrorWithError(err) 4343 return blunder.AddError(err, blunder.NameTooLongError) 4344 } 4345 return 4346 } 4347 4348 func validateFullPath(fullPath string) (err error) { 4349 pathLen := len(fullPath) 4350 if pathLen > FilePathMax { 4351 err = fmt.Errorf("%s: fullpath is too long. Length %v, max %v", utils.GetFnName(), pathLen, FilePathMax) 4352 logger.ErrorWithError(err) 4353 return blunder.AddError(err, blunder.NameTooLongError) 4354 } 4355 return 4356 } 4357 4358 func revSplitPath(fullpath string) []string { 4359 // TrimPrefix avoids empty [0] element in pathSegments 4360 trimmed := strings.TrimPrefix(fullpath, "/") 4361 if trimmed == "" { 4362 // path.Clean("") = ".", which is not useful 4363 return []string{} 4364 } 4365 4366 segments := strings.Split(path.Clean(trimmed), "/") 4367 slen := len(segments) 4368 for i := 0; i < slen/2; i++ { 4369 segments[i], segments[slen-i-1] = segments[slen-i-1], segments[i] 4370 } 4371 return segments 4372 } 4373 4374 // Utility function to unlink, but not destroy, a particular file or empty subdirectory. 4375 // 4376 // This function checks that the directory is empty. 4377 // 4378 // The caller of this function must hold appropriate locks. 4379 // 4380 // obstacleInodeNumber must refer to an existing file or directory 4381 // that is (a) already part of the directory tree and (b) not the root 4382 // directory. 4383 func (vS *volumeStruct) removeObstacleToObjectPut(callerID dlm.CallerID, dirInodeNumber inode.InodeNumber, obstacleName string, obstacleInodeNumber inode.InodeNumber) (err error) { 4384 var ( 4385 fileType inode.InodeType 4386 numEntries uint64 4387 statResult Stat 4388 toDestroyInodeNumber inode.InodeNumber 4389 ) 4390 4391 statResult, err = vS.getstatHelper(obstacleInodeNumber, callerID) 4392 if err != nil { 4393 return 4394 } 4395 4396 fileType = inode.InodeType(statResult[StatFType]) 4397 if fileType == inode.FileType || fileType == inode.SymlinkType { 4398 // Files and symlinks can always, barring errors, be unlinked 4399 toDestroyInodeNumber, err = vS.inodeVolumeHandle.Unlink(dirInodeNumber, obstacleName, false) 4400 if err != nil { 4401 return 4402 } 4403 } else if fileType == inode.DirType { 4404 numEntries, err = vS.inodeVolumeHandle.NumDirEntries(obstacleInodeNumber) 4405 if err != nil { 4406 return 4407 } 4408 if numEntries >= 3 { 4409 // We're looking at a pre-existing, user-visible directory 4410 // that's linked into the directory structure, so we've 4411 // got at least two entries, namely "." and ".." 4412 // 4413 // If there's a third, then the directory is non-empty. 4414 err = blunder.NewError(blunder.NotEmptyError, "%s is a non-empty directory", obstacleName) 4415 return 4416 } else { 4417 // We don't want to call Rmdir() here since 4418 // that function (a) grabs locks, (b) checks 4419 // that it's a directory and is empty, then 4420 // (c) calls Unlink() and Destroy(). 4421 // 4422 // We already have the locks and we've already 4423 // checked that it's empty, so let's just get 4424 // down to it. 4425 toDestroyInodeNumber, err = vS.inodeVolumeHandle.Unlink(dirInodeNumber, obstacleName, false) 4426 if err != nil { 4427 return 4428 } 4429 } 4430 } 4431 4432 if inode.InodeNumber(0) != toDestroyInodeNumber { 4433 err = vS.inodeVolumeHandle.Destroy(toDestroyInodeNumber) 4434 } 4435 4436 return 4437 } 4438 4439 // Utility function to append entries to reply 4440 func appendReadPlanEntries(readPlan []inode.ReadPlanStep, readRangeOut *[]inode.ReadPlanStep) (numEntries uint64) { 4441 for i := range readPlan { 4442 entry := inode.ReadPlanStep{ObjectPath: readPlan[i].ObjectPath, Offset: readPlan[i].Offset, Length: readPlan[i].Length} 4443 *readRangeOut = append(*readRangeOut, entry) 4444 numEntries++ 4445 } 4446 return 4447 }