github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/fs/fs.go (about) 1 // Package fs provides mountpath and FQN abstractions and methods to resolve/map stored content 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package fs 6 7 import ( 8 "fmt" 9 "math" 10 "os" 11 "path/filepath" 12 "sort" 13 "strings" 14 "sync" 15 ratomic "sync/atomic" 16 "syscall" 17 "time" 18 19 "github.com/NVIDIA/aistore/api/apc" 20 "github.com/NVIDIA/aistore/cmn" 21 "github.com/NVIDIA/aistore/cmn/atomic" 22 "github.com/NVIDIA/aistore/cmn/cos" 23 "github.com/NVIDIA/aistore/cmn/debug" 24 "github.com/NVIDIA/aistore/cmn/fname" 25 "github.com/NVIDIA/aistore/cmn/nlog" 26 "github.com/NVIDIA/aistore/ios" 27 "github.com/OneOfOne/xxhash" 28 ) 29 30 const bidUnknownTTL = 2 * time.Minute // comment below; TODO: unify and move to config along w/ lom cache 31 32 const nodeXattrID = "user.ais.daemon_id" 33 34 // enum Mountpath.Flags 35 const ( 36 FlagBeingDisabled uint64 = 1 << iota 37 FlagBeingDetached 38 ) 39 40 const FlagWaitingDD = FlagBeingDisabled | FlagBeingDetached 41 42 // Terminology: 43 // - a mountpath is equivalent to (configurable) fspath - both terms are used interchangeably; 44 // - each mountpath is, simply, a local directory that is serviced by a local filesystem; 45 // - there's a 1-to-1 relationship between a mountpath and a local filesystem 46 // (different mountpaths map onto different filesystems, and vise versa); 47 // - mountpaths of the form <filesystem-mountpoint>/a/b/c are supported. 48 49 type ( 50 Mountpath struct { 51 lomCaches cos.MultiSyncMap // LOM caches 52 info string 53 Path string // clean path 54 Label ios.Label // (disk sharing; help resolve lsblk; storage class; user-defined grouping) 55 cos.FS // underlying filesystem 56 Disks []string // owned disks (ios.FsDisks map => slice) 57 flags uint64 // bit flags (set/get atomic) 58 PathDigest uint64 // (HRW logic) 59 capacity Capacity 60 } 61 MPI map[string]*Mountpath 62 63 MFS struct { 64 ios ios.IOS 65 66 // fsIDs is set in which we store fsids of mountpaths. This allows for 67 // determining if there are any duplications of file system - we allow 68 // only one mountpath per file system. 69 fsIDs map[cos.FsID]string 70 71 // mountpaths 72 available ratomic.Pointer[MPI] 73 disabled ratomic.Pointer[MPI] 74 75 // capacity 76 cs CapStatus 77 csExpires atomic.Int64 78 totalSize atomic.Uint64 79 80 mu sync.Mutex 81 } 82 CapStatus struct { 83 // config 84 HighWM int64 85 OOS int64 86 // metrics 87 TotalUsed uint64 // bytes 88 TotalAvail uint64 // bytes 89 PctAvg int32 // average used (%) 90 PctMax int32 // max used (%) 91 PctMin int32 // max used (%) 92 } 93 ) 94 95 var mfs *MFS // singleton (target only) 96 97 /////////////// 98 // Mountpath // 99 /////////////// 100 101 func NewMountpath(mpath string, label ios.Label) (*Mountpath, error) { 102 cleanMpath, err := cmn.ValidateMpath(mpath) 103 if err != nil { 104 return nil, err 105 } 106 if err = cos.Stat(cleanMpath); err != nil { 107 return nil, cos.NewErrNotFound(nil, "mountpath "+mpath) 108 } 109 mi := &Mountpath{ 110 Path: cleanMpath, 111 Label: label, 112 PathDigest: xxhash.Checksum64S(cos.UnsafeB(cleanMpath), cos.MLCG32), 113 } 114 err = mi.resolveFS() 115 return mi, err 116 } 117 118 // flags 119 func (mi *Mountpath) setFlags(flags uint64) (ok bool) { 120 return cos.SetfAtomic(&mi.flags, flags) 121 } 122 123 func (mi *Mountpath) IsAnySet(flags uint64) bool { 124 return cos.IsAnySetfAtomic(&mi.flags, flags) 125 } 126 127 func (mi *Mountpath) String() string { 128 s := mi.Label.ToLog() 129 if mi.info == "" { 130 switch len(mi.Disks) { 131 case 0: 132 mi.info = fmt.Sprintf("mp[%s, fs=%s%s]", mi.Path, mi.Fs, s) 133 case 1: 134 mi.info = fmt.Sprintf("mp[%s, %s%s]", mi.Path, mi.Disks[0], s) 135 default: 136 mi.info = fmt.Sprintf("mp[%s, %v%s]", mi.Path, mi.Disks, s) 137 } 138 } 139 if !mi.IsAnySet(FlagWaitingDD) { 140 return mi.info 141 } 142 l := len(mi.info) 143 return mi.info[:l-1] + ", waiting-dd]" 144 } 145 146 func (mi *Mountpath) LomCache(idx int) *sync.Map { return mi.lomCaches.Get(idx) } 147 148 func LcacheIdx(digest uint64) int { return int(digest & cos.MultiSyncMapMask) } 149 150 func (mi *Mountpath) IsIdle(config *cmn.Config) bool { 151 curr := mfs.ios.GetMpathUtil(mi.Path) 152 return curr >= 0 && curr < config.Disk.DiskUtilLowWM 153 } 154 155 func (mi *Mountpath) CreateMissingBckDirs(bck *cmn.Bck) (err error) { 156 for contentType := range CSM.m { 157 dir := mi.MakePathCT(bck, contentType) 158 if err = cos.Stat(dir); err == nil { 159 continue 160 } 161 if err = cos.CreateDir(dir); err != nil { 162 return 163 } 164 } 165 return 166 } 167 168 func (mi *Mountpath) backupAtmost(from, backup string, bcnt, atMost int) (newBcnt int) { 169 var ( 170 fromPath = filepath.Join(mi.Path, from) 171 backupPath = filepath.Join(mi.Path, backup) 172 ) 173 os.Remove(backupPath) 174 newBcnt = bcnt 175 if bcnt >= atMost { 176 return 177 } 178 if err := cos.Stat(fromPath); err != nil { 179 return 180 } 181 if err := os.Rename(fromPath, backupPath); err != nil { 182 nlog.Errorln(err) 183 os.Remove(fromPath) 184 } else { 185 newBcnt = bcnt + 1 186 } 187 return 188 } 189 190 func (mi *Mountpath) SetDaemonIDXattr(tid string) error { 191 cos.Assert(tid != "") 192 // Validate if mountpath already has daemon ID set. 193 mpathDaeID, err := _loadXattrID(mi.Path) 194 if err != nil { 195 return err 196 } 197 if mpathDaeID == tid { 198 return nil 199 } 200 if mpathDaeID != "" && mpathDaeID != tid { 201 return &ErrStorageIntegrity{ 202 Code: SieMpathIDMismatch, 203 Msg: fmt.Sprintf("target ID mismatch: %q vs %q(%q)", tid, mpathDaeID, mi), 204 } 205 } 206 return SetXattr(mi.Path, nodeXattrID, []byte(tid)) 207 } 208 209 // make-path methods 210 211 func (mi *Mountpath) makePathBuf(bck *cmn.Bck, contentType string, extra int) (buf []byte) { 212 var provLen, nsLen, bckNameLen, ctLen int 213 if contentType != "" { 214 debug.Assert(len(contentType) == contentTypeLen) 215 debug.Assert(bck.Props == nil || bck.Props.BID != 0) 216 ctLen = 1 + 1 + contentTypeLen 217 } 218 if !bck.Ns.IsGlobal() { 219 nsLen = 1 220 if bck.Ns.IsRemote() { 221 nsLen += 1 + len(bck.Ns.UUID) 222 } 223 nsLen += 1 + len(bck.Ns.Name) 224 } 225 if bck.Name != "" { 226 bckNameLen = 1 + len(bck.Name) 227 } 228 provLen = 1 + 1 + len(bck.Provider) 229 buf = make([]byte, 0, len(mi.Path)+provLen+nsLen+bckNameLen+ctLen+extra) 230 buf = append(buf, mi.Path...) 231 buf = append(buf, filepath.Separator, prefProvider) 232 buf = append(buf, bck.Provider...) 233 if nsLen > 0 { 234 buf = append(buf, filepath.Separator) 235 if bck.Ns.IsRemote() { 236 buf = append(buf, prefNsUUID) 237 buf = append(buf, bck.Ns.UUID...) 238 } 239 buf = append(buf, prefNsName) 240 buf = append(buf, bck.Ns.Name...) 241 } 242 if bckNameLen > 0 { 243 buf = append(buf, filepath.Separator) 244 buf = append(buf, bck.Name...) 245 } 246 if ctLen > 0 { 247 buf = append(buf, filepath.Separator, prefCT) 248 buf = append(buf, contentType...) 249 } 250 return 251 } 252 253 func (mi *Mountpath) MakePathBck(bck *cmn.Bck) string { 254 buf := mi.makePathBuf(bck, "", 0) 255 return cos.UnsafeS(buf) 256 } 257 258 func (mi *Mountpath) MakePathCT(bck *cmn.Bck, contentType string) string { 259 debug.Assert(contentType != "") 260 buf := mi.makePathBuf(bck, contentType, 0) 261 return cos.UnsafeS(buf) 262 } 263 264 func (mi *Mountpath) MakePathFQN(bck *cmn.Bck, contentType, objName string) string { 265 debug.Assert(contentType != "" && objName != "") 266 buf := mi.makePathBuf(bck, contentType, 1+len(objName)) 267 buf = append(buf, filepath.Separator) 268 buf = append(buf, objName...) 269 return cos.UnsafeS(buf) 270 } 271 272 func (mi *Mountpath) makeDelPathBck(bck *cmn.Bck) string { 273 return mi.MakePathBck(bck) 274 } 275 276 // Creates all CT directories for a given (mountpath, bck) - NOTE handling of empty dirs 277 func (mi *Mountpath) createBckDirs(bck *cmn.Bck, nilbmd bool) (int, error) { 278 var num int 279 for contentType := range CSM.m { 280 dir := mi.MakePathCT(bck, contentType) 281 if err := cos.Stat(dir); err == nil { 282 if nilbmd { 283 // a) loaded previous BMD version or b) failed to load any 284 // in both cases, BMD cannot be fully trusted, and so we ignore that fact 285 // that the directory exists 286 // (scenario: decommission without proper cleanup, followed by rejoin) 287 nlog.Errorf("Warning: %s bdir %s exists but local BMD is not the latest", bck, dir) 288 num++ 289 continue 290 } 291 names, empty, errEmpty := IsDirEmpty(dir) 292 if errEmpty != nil { 293 return num, errEmpty 294 } 295 if !empty { 296 err = fmt.Errorf("bucket %s: directory %s already exists and is not empty (%v...)", 297 bck, dir, names) 298 if contentType != WorkfileType { 299 return num, err 300 } 301 nlog.Errorln(err) 302 } 303 } else if err := cos.CreateDir(dir); err != nil { 304 return num, fmt.Errorf("bucket %s: failed to create directory %s: %w", bck, dir, err) 305 } 306 num++ 307 } 308 return num, nil 309 } 310 311 func (mi *Mountpath) _setDisks(fsdisks ios.FsDisks) { 312 mi.Disks = make([]string, len(fsdisks)) 313 var i int 314 for d := range fsdisks { 315 mi.Disks[i] = d 316 i++ 317 } 318 } 319 320 // available/used capacity 321 322 func (mi *Mountpath) getCapacity(config *cmn.Config, refresh bool) (c Capacity, err error) { 323 if !refresh { 324 c.Used = ratomic.LoadUint64(&mi.capacity.Used) 325 c.Avail = ratomic.LoadUint64(&mi.capacity.Avail) 326 c.PctUsed = ratomic.LoadInt32(&mi.capacity.PctUsed) 327 return 328 } 329 statfs := &syscall.Statfs_t{} 330 if err = syscall.Statfs(mi.Path, statfs); err != nil { 331 return 332 } 333 bused := statfs.Blocks - statfs.Bavail 334 pct := bused * 100 / statfs.Blocks 335 if pct >= uint64(config.Space.HighWM)-1 { 336 fpct := math.Ceil(float64(bused) * 100 / float64(statfs.Blocks)) 337 pct = uint64(fpct) 338 } 339 u := bused * uint64(statfs.Bsize) 340 ratomic.StoreUint64(&mi.capacity.Used, u) 341 c.Used = u 342 a := statfs.Bavail * uint64(statfs.Bsize) 343 ratomic.StoreUint64(&mi.capacity.Avail, a) 344 c.Avail = a 345 ratomic.StoreInt32(&mi.capacity.PctUsed, int32(pct)) 346 c.PctUsed = int32(pct) 347 return 348 } 349 350 // 351 // mountpath add/enable helpers - always call under mfs lock 352 // 353 354 func (mi *Mountpath) AddEnabled(tid string, avail MPI, config *cmn.Config) (err error) { 355 if err = mi._validate(avail, config); err != nil { 356 return 357 } 358 if err = mi._addEnabled(tid, avail, config); err == nil { 359 mfs.fsIDs[mi.FsID] = mi.Path 360 } 361 cos.ClearfAtomic(&mi.flags, FlagWaitingDD) 362 return 363 } 364 365 func (mi *Mountpath) AddDisabled(disabled MPI) { 366 cos.ClearfAtomic(&mi.flags, FlagWaitingDD) 367 disabled[mi.Path] = mi 368 mfs.fsIDs[mi.FsID] = mi.Path 369 } 370 371 // check: 372 // - duplication 373 // - disk sharing 374 // - no disks 375 func (mi *Mountpath) _validate(avail MPI, config *cmn.Config) error { 376 existingMi, ok := avail[mi.Path] 377 if ok { 378 return fmt.Errorf("duplicated mountpath %s (%s)", mi, existingMi) 379 } 380 otherMpath, ok := mfs.fsIDs[mi.FsID] 381 if ok { 382 if config.TestingEnv() || cmn.AllowSharedDisksAndNoDisks { 383 return nil 384 } 385 if !mi.Label.IsNil() { 386 nlog.Warningf("FsID %v shared between (labeled) %s and %q - proceeding anyway", mi.FsID, mi, otherMpath) 387 return nil 388 } 389 return fmt.Errorf("FsID %v: filesystem sharing is not allowed: %s vs %q", mi.FsID, mi, otherMpath) 390 } 391 // check nesting 392 l := len(mi.Path) 393 for mpath := range avail { 394 if err := cmn.IsNestedMpath(mi.Path, l, mpath); err != nil { 395 return err 396 } 397 } 398 return nil 399 } 400 401 func (mi *Mountpath) _addEnabled(tid string, avail MPI, config *cmn.Config) error { 402 disks, err := mfs.ios.AddMpath(mi.Path, mi.Fs, mi.Label, config) 403 if err != nil { 404 return err 405 } 406 if tid != "" && config.WritePolicy.MD != apc.WriteNever { 407 if err := mi.SetDaemonIDXattr(tid); err != nil { 408 return err 409 } 410 } 411 mi._setDisks(disks) 412 _ = mi.String() // assign mi.info if not yet 413 avail[mi.Path] = mi 414 return nil 415 } 416 417 // under lock: clones and adds self to available 418 func (mi *Mountpath) _cloneAddEnabled(tid string, config *cmn.Config) (err error) { 419 debug.Assert(!mi.IsAnySet(FlagWaitingDD)) // m.b. new 420 avail, disabled := Get() 421 if _, ok := disabled[mi.Path]; ok { 422 return fmt.Errorf("%s exists and is currently disabled (hint: did you want to enable it?)", mi) 423 } 424 425 // dd-transition 426 if ddmi, ok := avail[mi.Path]; ok && ddmi.IsAnySet(FlagWaitingDD) { 427 availableCopy := _cloneOne(avail) 428 nlog.Warningf("%s (%s): interrupting dd-transition - adding&enabling", mi, ddmi) 429 availableCopy[mi.Path] = mi 430 putAvailMPI(availableCopy) 431 return 432 } 433 434 // add new mp 435 if err = mi._validate(avail, config); err != nil { 436 return 437 } 438 availableCopy := _cloneOne(avail) 439 if err = mi.AddEnabled(tid, availableCopy, config); err == nil { 440 putAvailMPI(availableCopy) 441 } 442 return 443 } 444 445 func (mi *Mountpath) ClearDD() { 446 cos.ClearfAtomic(&mi.flags, FlagWaitingDD) 447 } 448 449 func (mi *Mountpath) diskSize() (size uint64) { 450 numBlocks, _, blockSize, err := ios.GetFSStats(mi.Path) 451 if err != nil { 452 nlog.Errorln(mi.String(), "total disk size err:", err, strings.Repeat("<", 50)) 453 } else { 454 size = numBlocks * uint64(blockSize) 455 } 456 return 457 } 458 459 // bucket and bucket+prefix on-disk sizing (uses 'du') 460 func (mi *Mountpath) onDiskSize(bck *cmn.Bck, prefix string) (uint64, error) { 461 var ( 462 dirPath string 463 withNonDirPrefix bool 464 ) 465 if prefix == "" { 466 dirPath = mi.MakePathBck(bck) 467 } else { 468 dirPath = filepath.Join(mi.MakePathCT(bck, ObjectType), prefix) 469 if cos.Stat(dirPath) != nil { 470 dirPath += "*" // prefix is _not_ a directory 471 withNonDirPrefix = true // ok to fail matching 472 } 473 } 474 return ios.DirSizeOnDisk(dirPath, withNonDirPrefix) 475 } 476 477 func (mi *Mountpath) _cdf(tcdf *TargetCDF) *CDF { 478 cdf := tcdf.Mountpaths[mi.Path] 479 if cdf == nil { 480 tcdf.Mountpaths[mi.Path] = &CDF{} 481 cdf = tcdf.Mountpaths[mi.Path] 482 } 483 cdf.Disks = mi.Disks 484 cdf.FS = mi.FS 485 cdf.Label = mi.Label 486 cdf.Capacity = Capacity{} // reset (for caller to fill-in) 487 return cdf 488 } 489 490 // 491 // MFS & MPI 492 // 493 494 // create a new singleton 495 func New(num int) { 496 mfs = &MFS{fsIDs: make(map[cos.FsID]string, 10)} 497 mfs.ios = ios.New(num) 498 } 499 500 // used only in tests 501 func TestNew(iostater ios.IOS) { 502 const num = 10 503 mfs = &MFS{fsIDs: make(map[cos.FsID]string, num)} 504 if iostater == nil { 505 mfs.ios = ios.New(num) 506 } else { 507 mfs.ios = iostater 508 } 509 PutMPI(make(MPI, num), make(MPI, num)) 510 } 511 512 // `ios` delegations 513 func Clblk() { ios.Clblk(mfs.ios) } 514 func GetAllMpathUtils() (utils *ios.MpathUtil) { return mfs.ios.GetAllMpathUtils() } 515 func GetMpathUtil(mpath string) int64 { return mfs.ios.GetMpathUtil(mpath) } 516 func FillDiskStats(m ios.AllDiskStats) { mfs.ios.FillDiskStats(m) } 517 518 func putAvailMPI(available MPI) { mfs.available.Store(&available) } 519 func putDisabMPI(disabled MPI) { mfs.disabled.Store(&disabled) } 520 521 func PutMPI(available, disabled MPI) { 522 putAvailMPI(available) 523 putDisabMPI(disabled) 524 } 525 526 func MountpathsToLists() (mpl *apc.MountpathList) { 527 avail, disabled := Get() 528 mpl = &apc.MountpathList{ 529 Available: make([]string, 0, len(avail)), 530 WaitingDD: make([]string, 0), 531 Disabled: make([]string, 0, len(disabled)), 532 } 533 for _, mi := range avail { 534 if mi.IsAnySet(FlagWaitingDD) { 535 mpl.WaitingDD = append(mpl.WaitingDD, mi.Path) 536 } else { 537 mpl.Available = append(mpl.Available, mi.Path) 538 } 539 } 540 for mpath := range disabled { 541 mpl.Disabled = append(mpl.Disabled, mpath) 542 } 543 sort.Strings(mpl.Available) 544 sort.Strings(mpl.WaitingDD) 545 sort.Strings(mpl.Disabled) 546 return 547 } 548 549 // NOTE: must be under mfs lock 550 func _cloneOne(mpis MPI) (clone MPI) { 551 clone = make(MPI, len(mpis)) 552 for mpath, mi := range mpis { 553 clone[mpath] = mi 554 } 555 return 556 } 557 558 // cloneMPI returns a shallow copy of the current (available, disabled) mountpaths 559 func cloneMPI() (availableCopy, disabledCopy MPI) { 560 avail, disabled := Get() 561 availableCopy = _cloneOne(avail) 562 disabledCopy = _cloneOne(disabled) 563 return availableCopy, disabledCopy 564 } 565 566 // used only in tests (compare with AddMpath below) 567 func Add(mpath, tid string) (mi *Mountpath, err error) { 568 mi, err = NewMountpath(mpath, ios.TestLabel) 569 if err != nil { 570 return 571 } 572 config := cmn.GCO.Get() 573 mfs.mu.Lock() 574 err = mi._cloneAddEnabled(tid, config) 575 mfs.mu.Unlock() 576 return 577 } 578 579 func AddMpath(tid, mpath string, label ios.Label, cb func()) (mi *Mountpath, err error) { 580 mi, err = NewMountpath(mpath, label) 581 if err != nil { 582 return 583 } 584 585 config := cmn.GCO.Get() 586 if config.TestingEnv() { 587 if err = config.LocalConfig.TestFSP.ValidateMpath(mi.Path); err != nil { 588 nlog.Errorln(err, "- proceeding anyway") 589 } 590 } 591 592 mfs.mu.Lock() 593 err = mi._cloneAddEnabled(tid, config) 594 if err == nil { 595 cb() 596 } 597 mfs.mu.Unlock() 598 599 if mi.Path != mpath { 600 nlog.Warningf("%s: clean path(%q) => %q", mi, mpath, mi.Path) 601 } 602 return 603 } 604 605 // (unit tests only - compare with EnableMpath below) 606 func Enable(mpath string) (enabledMpath *Mountpath, err error) { 607 var cleanMpath string 608 if cleanMpath, err = cmn.ValidateMpath(mpath); err != nil { 609 return 610 } 611 config := cmn.GCO.Get() 612 mfs.mu.Lock() 613 enabledMpath, err = enable(mpath, cleanMpath, "" /*tid*/, config) 614 mfs.mu.Unlock() 615 return 616 } 617 618 // Enable enables previously disabled mountpath. enabled is set to 619 // true if mountpath has been moved from disabled to available and exists is 620 // set to true if such mountpath even exists. 621 func EnableMpath(mpath, tid string, cb func()) (enabledMpath *Mountpath, err error) { 622 var cleanMpath string 623 debug.Assert(tid != "") 624 if cleanMpath, err = cmn.ValidateMpath(mpath); err != nil { 625 return 626 } 627 config := cmn.GCO.Get() 628 mfs.mu.Lock() 629 enabledMpath, err = enable(mpath, cleanMpath, tid, config) 630 if err == nil { 631 cb() 632 } 633 mfs.mu.Unlock() 634 return 635 } 636 637 func enable(mpath, cleanMpath, tid string, config *cmn.Config) (enabledMpath *Mountpath, err error) { 638 avail, disabled := Get() 639 mi, ok := avail[cleanMpath] 640 641 // dd-transition 642 if ok { 643 debug.Assert(cleanMpath == mi.Path) 644 if _, ok = disabled[cleanMpath]; ok { 645 err = fmt.Errorf("FATAL: %s vs (%s, %s)", mi, avail, disabled) 646 nlog.Errorln(err) 647 debug.AssertNoErr(err) 648 return 649 } 650 if mi.IsAnySet(FlagWaitingDD) { 651 availableCopy := _cloneOne(avail) 652 mi, ok = availableCopy[cleanMpath] 653 debug.Assert(ok) 654 nlog.Warningln(mi.String()+":", "re-enabling during dd-transition") 655 cos.ClearfAtomic(&mi.flags, FlagWaitingDD) 656 enabledMpath = mi 657 putAvailMPI(availableCopy) 658 } else if cmn.Rom.FastV(4, cos.SmoduleFS) { 659 nlog.Infof("%s: %s is already available, nothing to do", tid, mi) 660 } 661 return 662 } 663 664 // re-enable 665 mi, ok = disabled[cleanMpath] 666 if !ok { 667 err = cmn.NewErrMountpathNotFound(mpath, "" /*fqn*/, false /*disabled*/) 668 return 669 } 670 debug.Assert(cleanMpath == mi.Path) 671 availableCopy, disabledCopy := cloneMPI() 672 mi, ok = disabledCopy[cleanMpath] 673 debug.Assert(ok) 674 if err = mi.AddEnabled(tid, availableCopy, config); err != nil { 675 return 676 } 677 enabledMpath = mi 678 delete(disabledCopy, cleanMpath) 679 PutMPI(availableCopy, disabledCopy) 680 return 681 } 682 683 // Remove removes mountpaths from the target's mountpaths. It searches 684 // for the mountpath in `available` and, if not found, in `disabled`. 685 func Remove(mpath string, cb ...func()) (*Mountpath, error) { 686 cleanMpath, err := cmn.ValidateMpath(mpath) 687 if err != nil { 688 return nil, err 689 } 690 691 mfs.mu.Lock() 692 defer mfs.mu.Unlock() 693 694 // Clear target ID if set 695 if err := removeXattr(cleanMpath, nodeXattrID); err != nil { 696 return nil, err 697 } 698 avail, disabled := Get() 699 mi, exists := avail[cleanMpath] 700 if !exists { 701 if mi, exists = disabled[cleanMpath]; !exists { 702 return nil, cmn.NewErrMountpathNotFound(mpath, "" /*fqn*/, false /*disabled*/) 703 } 704 debug.Assert(cleanMpath == mi.Path) 705 disabledCopy := _cloneOne(disabled) 706 delete(disabledCopy, cleanMpath) 707 delete(mfs.fsIDs, mi.FsID) // optional, benign 708 putDisabMPI(disabledCopy) 709 return mi, nil 710 } 711 debug.Assert(cleanMpath == mi.Path) 712 713 if _, exists = disabled[cleanMpath]; exists { 714 err := fmt.Errorf("FATAL: %s vs (%s, %s)", mi, avail, disabled) 715 nlog.Errorln(err) 716 debug.AssertNoErr(err) 717 return nil, err 718 } 719 720 config := cmn.GCO.Get() 721 availableCopy := _cloneOne(avail) 722 mfs.ios.RemoveMpath(cleanMpath, config.TestingEnv()) 723 delete(availableCopy, cleanMpath) 724 delete(mfs.fsIDs, mi.FsID) 725 726 availCnt := len(availableCopy) 727 if availCnt == 0 { 728 nlog.Errorf("removed the last available mountpath %s", mi) 729 } else { 730 nlog.Infof("removed mountpath %s (remain available: %d)", mi, availCnt) 731 } 732 moveMarkers(availableCopy, mi) 733 putAvailMPI(availableCopy) 734 if availCnt > 0 && len(cb) > 0 { 735 cb[0]() 736 } 737 return mi, nil 738 } 739 740 // begin (disable | detach) transaction: CoW-mark the corresponding mountpath 741 func BeginDD(action string, flags uint64, mpath string) (mi *Mountpath, numAvail int, noResil bool, err error) { 742 var cleanMpath string 743 debug.Assert(cos.BitFlags(flags).IsAnySet(cos.BitFlags(FlagWaitingDD))) 744 if cleanMpath, err = cmn.ValidateMpath(mpath); err != nil { 745 return 746 } 747 mfs.mu.Lock() 748 mi, numAvail, noResil, err = begdd(action, flags, cleanMpath) 749 mfs.mu.Unlock() 750 return 751 } 752 753 // under lock 754 func begdd(action string, flags uint64, mpath string) (mi *Mountpath, numAvail int, noResil bool, err error) { 755 var ( 756 avail, disabled = Get() 757 exists bool 758 ) 759 // dd inactive 760 if _, exists = avail[mpath]; !exists { 761 noResil = true 762 if mi, exists = disabled[mpath]; !exists { 763 err = cmn.NewErrMountpathNotFound(mpath, "" /*fqn*/, false /*disabled*/) 764 return 765 } 766 if action == apc.ActMountpathDisable { 767 nlog.Infof("%s(%q) is already fully disabled - nothing to do", mi, action) 768 mi = nil 769 } 770 numAvail = len(avail) 771 return 772 } 773 // dd active 774 clone := _cloneOne(avail) 775 mi = clone[mpath] 776 ok := mi.setFlags(flags) 777 debug.Assert(ok, mi.String()) // under lock 778 putAvailMPI(clone) 779 numAvail = len(clone) - 1 780 return 781 } 782 783 // Disables a mountpath, i.e., removes it from usage but keeps in the volume 784 // (for possible future re-enablement). If successful, returns the disabled mountpath. 785 // Otherwise, returns nil (also in the case if the mountpath was already disabled). 786 func Disable(mpath string, cb ...func()) (disabledMpath *Mountpath, err error) { 787 cleanMpath, err := cmn.ValidateMpath(mpath) 788 if err != nil { 789 return nil, err 790 } 791 792 mfs.mu.Lock() 793 defer mfs.mu.Unlock() 794 795 avail, disabled := Get() 796 if mi, ok := avail[cleanMpath]; ok { 797 debug.Assert(cleanMpath == mi.Path) 798 if _, ok = disabled[cleanMpath]; ok { 799 err = fmt.Errorf("FATAL: %s vs (%s, %s)", mi, avail, disabled) 800 nlog.Errorln(err) 801 debug.AssertNoErr(err) 802 return 803 } 804 availableCopy, disabledCopy := cloneMPI() 805 cos.ClearfAtomic(&mi.flags, FlagWaitingDD) 806 disabledCopy[cleanMpath] = mi 807 808 config := cmn.GCO.Get() 809 mfs.ios.RemoveMpath(cleanMpath, config.TestingEnv()) 810 delete(availableCopy, cleanMpath) 811 delete(mfs.fsIDs, mi.FsID) 812 moveMarkers(availableCopy, mi) 813 PutMPI(availableCopy, disabledCopy) 814 if l := len(availableCopy); l == 0 { 815 nlog.Errorf("disabled the last available mountpath %s", mi) 816 } else { 817 if len(cb) > 0 { 818 cb[0]() 819 } 820 nlog.Infof("disabled mountpath %s (%d remain%s active)", mi, l, cos.Plural(l)) 821 } 822 return mi, nil 823 } 824 825 if _, ok := disabled[cleanMpath]; ok { 826 return nil, nil // nothing to do 827 } 828 return nil, cmn.NewErrMountpathNotFound(mpath, "" /*fqn*/, false /*disabled*/) 829 } 830 831 func NumAvail() int { 832 avail := GetAvail() 833 return len(avail) 834 } 835 836 // returns both available and disabled mountpaths (compare with GetAvail) 837 func Get() (MPI, MPI) { 838 var ( 839 avail = mfs.available.Load() 840 disabled = mfs.disabled.Load() 841 ) 842 debug.Assert(avail != nil) 843 debug.Assert(disabled != nil) 844 return *avail, *disabled 845 } 846 847 func GetAvail() MPI { 848 avail := mfs.available.Load() 849 debug.Assert(avail != nil) 850 return *avail 851 } 852 853 func CreateBucket(bck *cmn.Bck, nilbmd bool) (errs []error) { 854 var ( 855 avail = GetAvail() 856 totalDirs = len(avail) * len(CSM.m) 857 totalCreatedDirs int 858 ) 859 for _, mi := range avail { 860 num, err := mi.createBckDirs(bck, nilbmd) 861 if err != nil { 862 errs = append(errs, err) 863 } else { 864 totalCreatedDirs += num 865 } 866 } 867 debug.Assert(totalCreatedDirs == totalDirs || errs != nil) 868 return 869 } 870 871 // NOTE: caller must make sure to evict LOM cache 872 func DestroyBucket(op string, bck *cmn.Bck, bid uint64) (err error) { 873 var ( 874 n int 875 avail = GetAvail() 876 count = len(avail) 877 now time.Time 878 ) 879 for _, mi := range avail { 880 // normally, unique bucket ID (aka BID) must be known 881 // - i.e., non-zero (and unique); 882 // zero ID indicates that either we are in the middle of bucket 883 // creation OR the latter was interrupted (and txn-create aborted) - 884 // thus, prior to going ahead with deletion: 885 if bid == 0 { 886 bdir := mi.MakePathBck(bck) 887 if finfo, erc := os.Stat(bdir); erc == nil { 888 mtime := finfo.ModTime() 889 if now.IsZero() { 890 now = time.Now() 891 } 892 if mtime.After(now) || now.Sub(mtime) < bidUnknownTTL { 893 return fmt.Errorf("%s %q: unknown BID with %q age below ttl (%v)", op, bck, bdir, mtime) 894 } 895 } 896 } 897 898 dir := mi.makeDelPathBck(bck) 899 if errMv := mi.MoveToDeleted(dir); errMv != nil { 900 nlog.Errorf("%s %q: failed to rm dir %q: %v", op, bck, dir, errMv) 901 // TODO: call fshc 902 } else { 903 n++ 904 } 905 } 906 if n < count { 907 err = fmt.Errorf("%s %q: failed to destroy %d out of %d dirs", op, bck, count-n, count) 908 } 909 return 910 } 911 912 func RenameBucketDirs(bckFrom, bckTo *cmn.Bck) (err error) { 913 avail := GetAvail() 914 renamed := make([]*Mountpath, 0, len(avail)) 915 for _, mi := range avail { 916 fromPath := mi.makeDelPathBck(bckFrom) 917 toPath := mi.MakePathBck(bckTo) 918 919 // remove destination bucket directory before renaming 920 // (the operation will fail otherwise) 921 errRm := RemoveAll(toPath) 922 debug.AssertNoErr(errRm) 923 924 if err = os.Rename(fromPath, toPath); err != nil { 925 break 926 } 927 renamed = append(renamed, mi) 928 } 929 930 if err == nil { 931 return 932 } 933 for _, mi := range renamed { 934 fromPath := mi.MakePathBck(bckTo) 935 toPath := mi.MakePathBck(bckFrom) 936 if erd := os.Rename(fromPath, toPath); erd != nil { 937 nlog.Errorln(erd) 938 } 939 } 940 return 941 } 942 943 func moveMarkers(available MPI, from *Mountpath) { 944 var ( 945 fromPath = filepath.Join(from.Path, fname.MarkersDir) 946 finfos, err = os.ReadDir(fromPath) 947 ) 948 if err != nil { 949 if !os.IsNotExist(err) { 950 nlog.Errorf("Failed to read markers' dir %q: %v", fromPath, err) 951 } 952 return 953 } 954 if len(finfos) == 0 { 955 return // no markers, nothing to do 956 } 957 958 // NOTE: `from` path must no longer be in the available mountpaths 959 _, ok := available[from.Path] 960 debug.Assert(!ok, from.String()) 961 for _, mi := range available { 962 ok = true 963 for _, fi := range finfos { 964 debug.Assert(!fi.IsDir(), fname.MarkersDir+cos.PathSeparator+fi.Name()) // marker is a file 965 var ( 966 fromPath = filepath.Join(from.Path, fname.MarkersDir, fi.Name()) 967 toPath = filepath.Join(mi.Path, fname.MarkersDir, fi.Name()) 968 ) 969 _, _, err := cos.CopyFile(fromPath, toPath, nil, cos.ChecksumNone) 970 if err != nil && os.IsNotExist(err) { 971 nlog.Errorf("Failed to move marker %q to %q: %v)", fromPath, toPath, err) 972 ok = false 973 } 974 } 975 if ok { 976 break 977 } 978 } 979 from.ClearMDs(true /*inclBMD*/) 980 } 981 982 // load node ID 983 984 // traverses all mountpaths to load and validate node ID 985 func LoadNodeID(mpaths cos.StrKVs) (mDaeID string, err error) { 986 for mp := range mpaths { 987 daeID, err := _loadXattrID(mp) 988 if err != nil { 989 return "", err 990 } 991 if daeID == "" { 992 continue 993 } 994 if mDaeID != "" { 995 if mDaeID != daeID { 996 return "", &ErrStorageIntegrity{ 997 Code: SieMpathIDMismatch, 998 Msg: fmt.Sprintf("target ID mismatch: %q vs %q(%q)", mDaeID, daeID, mp), 999 } 1000 } 1001 continue 1002 } 1003 mDaeID = daeID 1004 } 1005 return 1006 } 1007 1008 func _loadXattrID(mpath string) (daeID string, err error) { 1009 b, err := GetXattr(mpath, nodeXattrID) 1010 if err == nil { 1011 daeID = string(b) 1012 return 1013 } 1014 if cos.IsErrXattrNotFound(err) { 1015 err = nil 1016 } 1017 return 1018 } 1019 1020 ///////// 1021 // MPI // 1022 ///////// 1023 1024 func (mpi MPI) String() string { 1025 return fmt.Sprintf("%v", mpi.toSlice()) 1026 } 1027 1028 func (mpi MPI) toSlice() []string { 1029 var ( 1030 paths = make([]string, len(mpi)) 1031 idx int 1032 ) 1033 for key := range mpi { 1034 paths[idx] = key 1035 idx++ 1036 } 1037 return paths 1038 } 1039 1040 // 1041 // capacity management/reporting 1042 // 1043 1044 // total disk size 1045 func ComputeDiskSize() { 1046 var ( 1047 totalSize uint64 1048 avail = GetAvail() 1049 ) 1050 for _, mi := range avail { 1051 totalSize += mi.diskSize() 1052 } 1053 mfs.totalSize.Store(totalSize) 1054 } 1055 1056 func GetDiskSize() uint64 { return mfs.totalSize.Load() } 1057 1058 // bucket and bucket+prefix on-disk sizing 1059 func OnDiskSize(bck *cmn.Bck, prefix string) (size uint64) { 1060 avail := GetAvail() 1061 for _, mi := range avail { 1062 sz, err := mi.onDiskSize(bck, prefix) 1063 if err != nil { 1064 if cmn.Rom.FastV(4, cos.SmoduleFS) { 1065 nlog.Warningln("failed to 'du':", err, "["+mi.String(), bck.String(), prefix+"]") 1066 } 1067 return 0 1068 } 1069 size += sz 1070 } 1071 return 1072 } 1073 1074 // cap status: get, refresh, periodic 1075 1076 func Cap() (cs CapStatus) { 1077 // config 1078 cs.OOS = ratomic.LoadInt64(&mfs.cs.OOS) 1079 cs.HighWM = ratomic.LoadInt64(&mfs.cs.HighWM) 1080 // metrics 1081 cs.TotalUsed = ratomic.LoadUint64(&mfs.cs.TotalUsed) 1082 cs.TotalAvail = ratomic.LoadUint64(&mfs.cs.TotalAvail) 1083 cs.PctMin = ratomic.LoadInt32(&mfs.cs.PctMin) 1084 cs.PctAvg = ratomic.LoadInt32(&mfs.cs.PctAvg) 1085 cs.PctMax = ratomic.LoadInt32(&mfs.cs.PctMax) 1086 return 1087 } 1088 1089 func NoneShared(numMpaths int) bool { return len(mfs.fsIDs) >= numMpaths } 1090 1091 // sum up && compute %% capacities while skipping already _counted_ filesystems 1092 func CapRefresh(config *cmn.Config, tcdf *TargetCDF) (cs CapStatus, _, errCap error) { 1093 var ( 1094 fsIDs []cos.FsID 1095 avail = GetAvail() 1096 l = len(avail) 1097 n int // num different filesystems (<= len(mfs.fsIDs)) 1098 unique bool 1099 ) 1100 if l == 0 { 1101 if tcdf != nil { 1102 tcdf.Mountpaths = make(map[string]*CDF) 1103 } 1104 return cs, cmn.ErrNoMountpaths, nil 1105 } 1106 1107 // fast path: available w/ no sharing 1108 fast := NoneShared(l) 1109 unique = fast 1110 1111 if !fast { 1112 fsIDs = make([]cos.FsID, 0, l) 1113 } 1114 1115 if config == nil { 1116 config = cmn.GCO.Get() 1117 } 1118 cs.HighWM, cs.OOS = config.Space.HighWM, config.Space.OOS 1119 cs.PctMin = 101 1120 for _, mi := range avail { 1121 if !fast { 1122 fsIDs, unique = cos.AddUniqueFsID(fsIDs, mi.FsID) 1123 } 1124 if !unique { 1125 // (same fs across) 1126 if tcdf != nil { 1127 _ = mi._cdf(tcdf) 1128 } 1129 continue 1130 } 1131 1132 // this mountpath's cap 1133 c, err := mi.getCapacity(config, true) 1134 if err != nil { 1135 nlog.Errorln(mi.String()+":", err) 1136 return cs, err, nil 1137 } 1138 if tcdf != nil { 1139 cdf := mi._cdf(tcdf) 1140 cdf.Capacity = c 1141 } 1142 1143 // recompute totals 1144 cs.TotalUsed += c.Used 1145 cs.TotalAvail += c.Avail 1146 cs.PctMax = max(cs.PctMax, c.PctUsed) 1147 cs.PctMin = min(cs.PctMin, c.PctUsed) 1148 n++ 1149 cs.PctAvg += c.PctUsed 1150 } 1151 debug.Assert(cs.PctMin < 101) 1152 cs.PctAvg /= int32(n) 1153 1154 errCap = cs.Err() 1155 1156 // fill-in and prune 1157 if tcdf != nil { 1158 tcdf.PctMax, tcdf.PctAvg, tcdf.PctMin = cs.PctMax, cs.PctAvg, cs.PctMin 1159 tcdf.TotalUsed, tcdf.TotalAvail = cs.TotalUsed, cs.TotalAvail 1160 if errCap != nil { 1161 tcdf.CsErr = errCap.Error() 1162 } 1163 // prune detached and disabled, if any 1164 for mpath := range tcdf.Mountpaths { 1165 if _, ok := avail[mpath]; !ok { 1166 delete(tcdf.Mountpaths, mpath) 1167 } 1168 } 1169 // duplicate shared filesystem cap => (its mountpaths) 1170 if n < l { 1171 for mpath1, cdf1 := range tcdf.Mountpaths { 1172 for mpath2, cdf2 := range tcdf.Mountpaths { 1173 if mpath1 != mpath2 && cdf1.FS.Equal(cdf2.FS) { 1174 _either(cdf1, cdf2) 1175 } 1176 } 1177 } 1178 } 1179 } 1180 1181 // update cached state 1182 ratomic.StoreInt64(&mfs.cs.HighWM, cs.HighWM) 1183 ratomic.StoreInt64(&mfs.cs.OOS, cs.OOS) 1184 ratomic.StoreUint64(&mfs.cs.TotalUsed, cs.TotalUsed) 1185 ratomic.StoreUint64(&mfs.cs.TotalAvail, cs.TotalAvail) 1186 ratomic.StoreInt32(&mfs.cs.PctMin, cs.PctMin) 1187 ratomic.StoreInt32(&mfs.cs.PctAvg, cs.PctAvg) 1188 ratomic.StoreInt32(&mfs.cs.PctMax, cs.PctMax) 1189 1190 return cs, nil, errCap 1191 } 1192 1193 func _either(cdf1, cdf2 *CDF) { 1194 if cdf1.Capacity.Used == 0 && cdf1.Capacity.Avail == 0 { 1195 cdf1.Capacity = cdf2.Capacity 1196 } else if cdf2.Capacity.Used == 0 && cdf2.Capacity.Avail == 0 { 1197 cdf2.Capacity = cdf1.Capacity 1198 } 1199 } 1200 1201 // called only and exclusively by `stats.Trunner` providing `config.Periodic.StatsTime` tick 1202 func CapPeriodic(now int64, config *cmn.Config, tcdf *TargetCDF) (cs CapStatus, updated bool, err, errCap error) { 1203 if now < mfs.csExpires.Load() { 1204 return 1205 } 1206 cs, err, errCap = CapRefresh(config, tcdf) 1207 updated = err == nil 1208 mfs.csExpires.Store(now + int64(cs._next(config))) 1209 return 1210 } 1211 1212 func CapStatusGetWhat() (fsInfo apc.CapacityInfo) { 1213 cs := Cap() 1214 fsInfo.Used = cs.TotalUsed 1215 fsInfo.Total = cs.TotalUsed + cs.TotalAvail 1216 fsInfo.PctUsed = float64(cs.PctAvg) 1217 return 1218 } 1219 1220 /////////////// 1221 // CapStatus // 1222 /////////////// 1223 1224 // note: conditioning on max, not avg 1225 func (cs *CapStatus) Err() (err error) { 1226 oos := cs.IsOOS() 1227 if oos || int64(cs.PctMax) > cs.HighWM { 1228 err = cmn.NewErrCapExceeded(cs.TotalUsed, cs.TotalAvail+cs.TotalUsed, cs.HighWM, 0 /*cleanup wm*/, cs.PctMax, oos) 1229 } 1230 return 1231 } 1232 1233 func (cs *CapStatus) IsOOS() bool { return int64(cs.PctMax) > cs.OOS } 1234 1235 func (cs *CapStatus) IsNil() bool { return cs.TotalUsed == 0 && cs.TotalAvail == 0 } 1236 1237 func (cs *CapStatus) String() (s string) { 1238 var ( 1239 totalUsed = cos.ToSizeIEC(int64(cs.TotalUsed), 1) 1240 totalAvail = cos.ToSizeIEC(int64(cs.TotalAvail), 1) 1241 ) 1242 s = fmt.Sprintf("cap(used %s, avail %s [min=%d%%, avg=%d%%, max=%d%%]", totalUsed, totalAvail, 1243 cs.PctMin, cs.PctAvg, cs.PctMax) 1244 switch { 1245 case cs.IsOOS(): 1246 s += ", OOS" 1247 case int64(cs.PctMax) > cs.HighWM: 1248 s += ", high-wm" 1249 } 1250 s += ")" 1251 return 1252 } 1253 1254 // next time to CapRefresh() 1255 func (cs *CapStatus) _next(config *cmn.Config) time.Duration { 1256 var ( 1257 util = int64(cs.PctMax) 1258 umin = min(config.Space.HighWM-10, config.Space.LowWM) 1259 umax = config.Space.OOS 1260 tmax = config.LRU.CapacityUpdTime.D() 1261 tmin = config.Periodic.StatsTime.D() 1262 ) 1263 umin = min(umin, config.Space.CleanupWM) 1264 if util <= umin { 1265 return tmax 1266 } 1267 if util >= umax-1 { 1268 return tmin 1269 } 1270 ratio := (util - umin) * 100 / (umax - umin) 1271 return time.Duration(100-ratio)*(tmax-tmin)/100 + tmin 1272 }