github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/space/cleanup.go (about) 1 // Package space provides storage cleanup and eviction functionality (the latter based on the 2 // least recently used cache replacement). It also serves as a built-in garbage-collection 3 // mechanism for orphaned workfiles. 4 /* 5 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 6 */ 7 package space 8 9 import ( 10 "fmt" 11 "os" 12 "path/filepath" 13 "sync" 14 "time" 15 16 "github.com/NVIDIA/aistore/api/apc" 17 "github.com/NVIDIA/aistore/cmn" 18 "github.com/NVIDIA/aistore/cmn/atomic" 19 "github.com/NVIDIA/aistore/cmn/cos" 20 "github.com/NVIDIA/aistore/cmn/debug" 21 "github.com/NVIDIA/aistore/cmn/nlog" 22 "github.com/NVIDIA/aistore/core" 23 "github.com/NVIDIA/aistore/core/meta" 24 "github.com/NVIDIA/aistore/fs" 25 "github.com/NVIDIA/aistore/ios" 26 "github.com/NVIDIA/aistore/stats" 27 "github.com/NVIDIA/aistore/xact" 28 "github.com/NVIDIA/aistore/xact/xreg" 29 ) 30 31 type ( 32 IniCln struct { 33 Config *cmn.Config 34 Xaction *XactCln 35 StatsT stats.Tracker 36 Buckets []cmn.Bck // optional list of specific buckets to cleanup 37 WG *sync.WaitGroup 38 } 39 XactCln struct { 40 xact.Base 41 } 42 ) 43 44 // private 45 type ( 46 // parent (contains mpath joggers) 47 clnP struct { 48 wg sync.WaitGroup 49 joggers map[string]*clnJ 50 ini IniCln 51 cs struct { 52 a fs.CapStatus // initial 53 b fs.CapStatus // capacity after removing 'deleted' 54 c fs.CapStatus // upon finishing 55 } 56 jcnt atomic.Int32 57 } 58 // clnJ represents a single cleanup context and a single /jogger/ 59 // that traverses and evicts a single given mountpath. 60 clnJ struct { 61 // runtime 62 oldWork []string 63 misplaced struct { 64 loms []*core.LOM 65 ec []*core.CT // EC slices and replicas without corresponding metafiles (CT FQN -> Meta FQN) 66 } 67 bck cmn.Bck 68 now int64 69 // init-time 70 p *clnP 71 ini *IniCln 72 stopCh chan struct{} 73 joggers map[string]*clnJ 74 mi *fs.Mountpath 75 config *cmn.Config 76 } 77 clnFactory struct { 78 xreg.RenewBase 79 xctn *XactCln 80 } 81 ) 82 83 // interface guard 84 var ( 85 _ xreg.Renewable = (*clnFactory)(nil) 86 _ core.Xact = (*XactCln)(nil) 87 ) 88 89 func (*XactCln) Run(*sync.WaitGroup) { debug.Assert(false) } 90 91 func (r *XactCln) Snap() (snap *core.Snap) { 92 snap = &core.Snap{} 93 r.ToSnap(snap) 94 95 snap.IdleX = r.IsIdle() 96 return 97 } 98 99 //////////////// 100 // clnFactory // 101 //////////////// 102 103 func (*clnFactory) New(args xreg.Args, _ *meta.Bck) xreg.Renewable { 104 return &clnFactory{RenewBase: xreg.RenewBase{Args: args}} 105 } 106 107 func (p *clnFactory) Start() error { 108 p.xctn = &XactCln{} 109 p.xctn.InitBase(p.UUID(), apc.ActStoreCleanup, nil) 110 return nil 111 } 112 113 func (*clnFactory) Kind() string { return apc.ActStoreCleanup } 114 func (p *clnFactory) Get() core.Xact { return p.xctn } 115 116 func (*clnFactory) WhenPrevIsRunning(prevEntry xreg.Renewable) (wpr xreg.WPR, err error) { 117 return xreg.WprUse, cmn.NewErrXactUsePrev(prevEntry.Get().String()) 118 } 119 120 func RunCleanup(ini *IniCln) fs.CapStatus { 121 var ( 122 xcln = ini.Xaction 123 config = cmn.GCO.Get() 124 availablePaths = fs.GetAvail() 125 num = len(availablePaths) 126 joggers = make(map[string]*clnJ, num) 127 parent = &clnP{joggers: joggers, ini: *ini} 128 ) 129 defer func() { 130 if ini.WG != nil { 131 ini.WG.Done() 132 } 133 }() 134 if num == 0 { 135 xcln.AddErr(cmn.ErrNoMountpaths, 0) 136 xcln.Finish() 137 return fs.CapStatus{} 138 } 139 for mpath, mi := range availablePaths { 140 joggers[mpath] = &clnJ{ 141 oldWork: make([]string, 0, 64), 142 stopCh: make(chan struct{}, 1), 143 mi: mi, 144 config: config, 145 ini: &parent.ini, 146 p: parent, 147 } 148 joggers[mpath].misplaced.loms = make([]*core.LOM, 0, 64) 149 joggers[mpath].misplaced.ec = make([]*core.CT, 0, 64) 150 } 151 parent.jcnt.Store(int32(len(joggers))) 152 providers := apc.Providers.ToSlice() 153 for _, j := range joggers { 154 parent.wg.Add(1) 155 j.joggers = joggers 156 go j.run(providers) 157 } 158 159 parent.cs.a = fs.Cap() 160 nlog.Infoln(xcln.Name(), "started: ", xcln, parent.cs.a.String()) 161 if ini.WG != nil { 162 ini.WG.Done() 163 ini.WG = nil 164 } 165 parent.wg.Wait() 166 167 for _, j := range joggers { 168 j.stop() 169 } 170 171 var err, errCap error 172 parent.cs.c, err, errCap = fs.CapRefresh(config, nil /*tcdf*/) 173 if err != nil { 174 xcln.AddErr(err) 175 } 176 if errCap != nil { 177 xcln.AddErr(errCap) 178 } 179 xcln.Finish() 180 nlog.Infoln(xcln.Name(), "finished:", errCap) 181 182 return parent.cs.c 183 } 184 185 func (p *clnP) rmMisplaced() bool { 186 var ( 187 g = xreg.GetRebMarked() 188 l = xreg.GetResilverMarked() 189 ) 190 if g.Xact == nil && l.Xact == nil && !g.Interrupted && !g.Restarted && !l.Interrupted { 191 return true 192 } 193 194 // log 195 var warn, info string 196 if p.cs.a.Err() != nil { 197 warn = fmt.Sprintf("%s: %s but not removing misplaced/obsolete copies: ", p.ini.Xaction, p.cs.a.String()) 198 } else { 199 warn = fmt.Sprintf("%s: not removing misplaced/obsolete copies: ", p.ini.Xaction) 200 } 201 switch { 202 case g.Xact != nil: 203 info = g.Xact.String() + " is running" 204 case g.Interrupted: 205 info = "rebalance interrupted" 206 case g.Restarted: 207 info = "node restarted" 208 case l.Xact != nil: 209 info = l.Xact.String() + " is running" 210 case l.Interrupted: 211 info = "resilver interrupted" 212 } 213 if p.cs.a.Err() != nil { 214 nlog.Errorln(warn + info) 215 } else { 216 nlog.Warningln(warn + info) 217 } 218 return false 219 } 220 221 ////////// 222 // clnJ // 223 ////////// 224 225 // mountpath cleanup j 226 227 func (j *clnJ) String() string { 228 return fmt.Sprintf("%s: jog-%s", j.ini.Xaction, j.mi) 229 } 230 231 func (j *clnJ) stop() { j.stopCh <- struct{}{} } 232 233 func (j *clnJ) run(providers []string) { 234 const f = "%s: freed space %s (not including removed 'deleted')" 235 var ( 236 size int64 237 err, erm error 238 ) 239 // globally 240 erm = j.removeDeleted() 241 if erm != nil { 242 nlog.Errorln(erm) 243 } 244 245 // traverse 246 if len(j.ini.Buckets) != 0 { 247 size, err = j.jogBcks(j.ini.Buckets) 248 } else { 249 size, err = j.jog(providers) 250 } 251 if err == nil { 252 err = erm 253 } 254 if err == nil { 255 if size != 0 { 256 nlog.Infof(f, j, cos.ToSizeIEC(size, 1)) 257 } 258 } else { 259 nlog.Errorf(f+", err: %v", j, cos.ToSizeIEC(size, 1), err) 260 } 261 j.p.wg.Done() 262 } 263 264 func (j *clnJ) jog(providers []string) (size int64, rerr error) { 265 for _, provider := range providers { // for each provider (NOTE: ordering is random) 266 var ( 267 sz int64 268 bcks []cmn.Bck 269 err error 270 opts = fs.WalkOpts{Mi: j.mi, Bck: cmn.Bck{Provider: provider, Ns: cmn.NsGlobal}} 271 ) 272 if bcks, err = fs.AllMpathBcks(&opts); err != nil { 273 nlog.Errorln(err) 274 if rerr == nil { 275 rerr = err 276 } 277 continue 278 } 279 if len(bcks) == 0 { 280 continue 281 } 282 sz, err = j.jogBcks(bcks) 283 size += sz 284 if err != nil && rerr == nil { 285 rerr = err 286 } 287 } 288 return 289 } 290 291 func (j *clnJ) jogBcks(bcks []cmn.Bck) (size int64, rerr error) { 292 bowner := core.T.Bowner() 293 for i := range bcks { // for each bucket under a given provider 294 var ( 295 err error 296 sz int64 297 bck = bcks[i] 298 b = meta.CloneBck(&bck) 299 ) 300 j.bck = bck 301 err = b.Init(bowner) 302 if err != nil { 303 if cmn.IsErrBckNotFound(err) || cmn.IsErrRemoteBckNotFound(err) { 304 const act = "delete non-existing" 305 if err = fs.DestroyBucket(act, &bck, 0 /*unknown BID*/); err == nil { 306 nlog.Infof("%s: %s %s", j, act, bck) 307 } else { 308 j.ini.Xaction.AddErr(err) 309 nlog.Errorf("%s %s: %v - skipping", j, act, err) 310 } 311 } else { 312 // TODO: config option to scrub `fs.AllMpathBcks` buckets 313 j.ini.Xaction.AddErr(err) 314 nlog.Errorf("%s: %v - skipping %s", j, err, bck) 315 } 316 continue 317 } 318 sz, err = j.jogBck() 319 size += sz 320 if err != nil && rerr == nil { 321 rerr = err 322 } 323 } 324 return size, rerr 325 } 326 327 func (j *clnJ) removeDeleted() (err error) { 328 err = j.mi.RemoveDeleted(j.String()) 329 if err != nil { 330 j.ini.Xaction.AddErr(err) 331 } 332 if cnt := j.p.jcnt.Dec(); cnt > 0 { 333 return 334 } 335 336 // last rm-deleted done: refresh cap now 337 var errCap error 338 j.p.cs.b, err, errCap = fs.CapRefresh(j.config, nil /*tcdf*/) 339 if err != nil { 340 j.ini.Xaction.Abort(err) 341 } else { 342 nlog.Infoln(j.ini.Xaction.Name(), "post-rm('deleted'):", errCap) 343 } 344 return 345 } 346 347 func (j *clnJ) jogBck() (size int64, err error) { 348 opts := &fs.WalkOpts{ 349 Mi: j.mi, 350 Bck: j.bck, 351 CTs: []string{fs.WorkfileType, fs.ObjectType, fs.ECSliceType, fs.ECMetaType}, 352 Callback: j.walk, 353 Sorted: false, 354 } 355 j.now = time.Now().UnixNano() 356 if err = fs.Walk(opts); err != nil { 357 return 358 } 359 size, err = j.rmLeftovers() 360 return 361 } 362 363 func (j *clnJ) visitCT(parsedFQN *fs.ParsedFQN, fqn string) { 364 switch parsedFQN.ContentType { 365 case fs.WorkfileType: 366 _, base := filepath.Split(fqn) 367 contentResolver := fs.CSM.Resolver(fs.WorkfileType) 368 _, old, ok := contentResolver.ParseUniqueFQN(base) 369 // workfiles: remove old or do nothing 370 if ok && old { 371 j.oldWork = append(j.oldWork, fqn) 372 } 373 case fs.ECSliceType: 374 // EC slices: 375 // - EC enabled: remove only slices with missing metafiles 376 // - EC disabled: remove all slices 377 ct, err := core.NewCTFromFQN(fqn, core.T.Bowner()) 378 if err != nil || !ct.Bck().Props.EC.Enabled { 379 j.oldWork = append(j.oldWork, fqn) 380 return 381 } 382 if err := ct.LoadFromFS(); err != nil { 383 return 384 } 385 // Saving a CT is not atomic: first it saves CT, then its metafile 386 // follows. Ignore just updated CTs to avoid processing incomplete data. 387 if ct.MtimeUnix()+int64(j.config.LRU.DontEvictTime) > j.now { 388 return 389 } 390 metaFQN := fs.CSM.Gen(ct, fs.ECMetaType, "") 391 if cos.Stat(metaFQN) != nil { 392 j.misplaced.ec = append(j.misplaced.ec, ct) 393 } 394 case fs.ECMetaType: 395 // EC metafiles: 396 // - EC enabled: remove only without corresponding slice or replica 397 // - EC disabled: remove all metafiles 398 ct, err := core.NewCTFromFQN(fqn, core.T.Bowner()) 399 if err != nil || !ct.Bck().Props.EC.Enabled { 400 j.oldWork = append(j.oldWork, fqn) 401 return 402 } 403 // Metafile is saved the last. If there is no corresponding replica or 404 // slice, it is safe to remove the stray metafile. 405 sliceCT := ct.Clone(fs.ECSliceType) 406 if cos.Stat(sliceCT.FQN()) == nil { 407 return 408 } 409 objCT := ct.Clone(fs.ObjectType) 410 if cos.Stat(objCT.FQN()) == nil { 411 return 412 } 413 j.oldWork = append(j.oldWork, fqn) 414 default: 415 debug.Assertf(false, "Unsupported content type: %s", parsedFQN.ContentType) 416 } 417 } 418 419 // TODO: add stats error counters (stats.ErrLmetaCorruptedCount, ...) 420 // TODO: revisit rm-ed byte counting 421 func (j *clnJ) visitObj(fqn string, lom *core.LOM) { 422 if err := lom.InitFQN(fqn, &j.bck); err != nil { 423 return 424 } 425 // handle load err 426 if errLoad := lom.Load(false /*cache it*/, false /*locked*/); errLoad != nil { 427 _, atime, err := ios.FinfoAtime(lom.FQN) 428 if err != nil { 429 if !os.IsNotExist(err) { 430 err = os.NewSyscallError("stat", err) 431 j.ini.Xaction.AddErr(err) 432 core.T.FSHC(err, lom.FQN) 433 } 434 return 435 } 436 // too early to remove anything 437 if atime+int64(j.config.LRU.DontEvictTime) < j.now { 438 return 439 } 440 if cmn.IsErrLmetaCorrupted(err) { 441 if err := cos.RemoveFile(lom.FQN); err != nil { 442 nlog.Errorf("%s: failed to rm MD-corrupted %s: %v (nested: %v)", j, lom, errLoad, err) 443 j.ini.Xaction.AddErr(err) 444 } else { 445 nlog.Errorf("%s: removed MD-corrupted %s: %v", j, lom, errLoad) 446 } 447 } else if cmn.IsErrLmetaNotFound(err) { 448 if err := cos.RemoveFile(lom.FQN); err != nil { 449 nlog.Errorf("%s: failed to rm no-MD %s: %v (nested: %v)", j, lom, errLoad, err) 450 j.ini.Xaction.AddErr(err) 451 } else { 452 nlog.Errorf("%s: removed no-MD %s: %v", j, lom, errLoad) 453 } 454 } 455 return 456 } 457 // too early 458 if lom.AtimeUnix()+int64(j.config.LRU.DontEvictTime) > j.now { 459 if cmn.Rom.FastV(5, cos.SmoduleSpace) { 460 nlog.Infof("too early for %s: atime %v", lom, lom.Atime()) 461 } 462 return 463 } 464 if lom.IsHRW() { 465 if lom.HasCopies() { 466 j.rmExtraCopies(lom) 467 } 468 return 469 } 470 if lom.IsCopy() { 471 return 472 } 473 if lom.ECEnabled() { 474 metaFQN := fs.CSM.Gen(lom, fs.ECMetaType, "") 475 if cos.Stat(metaFQN) != nil { 476 j.misplaced.ec = append(j.misplaced.ec, core.NewCTFromLOM(lom, fs.ObjectType)) 477 } 478 } else { 479 j.misplaced.loms = append(j.misplaced.loms, lom) 480 } 481 } 482 483 func (j *clnJ) rmExtraCopies(lom *core.LOM) { 484 if !lom.TryLock(true) { 485 return // must be busy 486 } 487 defer lom.Unlock(true) 488 // reload under lock and check atime - again 489 if err := lom.Load(false /*cache it*/, true /*locked*/); err != nil { 490 if !cos.IsNotExist(err, 0) { 491 j.ini.Xaction.AddErr(err) 492 } 493 return 494 } 495 if lom.AtimeUnix()+int64(j.config.LRU.DontEvictTime) > j.now { 496 return 497 } 498 if lom.IsCopy() { 499 return // extremely unlikely but ok 500 } 501 if _, err := lom.DelExtraCopies(); err != nil { 502 err = fmt.Errorf("%s: failed delete redundant copies of %s: %v", j, lom, err) 503 j.ini.Xaction.AddErr(err, 5, cos.SmoduleSpace) 504 } 505 } 506 507 func (j *clnJ) walk(fqn string, de fs.DirEntry) error { 508 var parsed fs.ParsedFQN 509 if de.IsDir() { 510 return nil 511 } 512 if err := j.yieldTerm(); err != nil { 513 return err 514 } 515 if _, err := core.ResolveFQN(fqn, &parsed); err != nil { 516 return nil 517 } 518 if parsed.ContentType != fs.ObjectType { 519 j.visitCT(&parsed, fqn) 520 } else { 521 lom := core.AllocLOM("") 522 j.visitObj(fqn, lom) 523 core.FreeLOM(lom) 524 } 525 return nil 526 } 527 528 // TODO: remove disfunctional files as soon as possible without adding them to slices. 529 func (j *clnJ) rmLeftovers() (size int64, err error) { 530 var ( 531 fevicted, bevicted int64 532 xcln = j.ini.Xaction 533 ) 534 if cmn.Rom.FastV(4, cos.SmoduleSpace) { 535 nlog.Infof("%s: num-old %d, misplaced (%d, ec=%d)", j, len(j.oldWork), len(j.misplaced.loms), len(j.misplaced.ec)) 536 } 537 538 // 1. rm older work 539 for _, workfqn := range j.oldWork { 540 finfo, erw := os.Stat(workfqn) 541 if erw == nil { 542 if err := cos.RemoveFile(workfqn); err != nil { 543 nlog.Errorf("%s: failed to rm old work %q: %v", j, workfqn, err) 544 } else { 545 size += finfo.Size() 546 fevicted++ 547 bevicted += finfo.Size() 548 if cmn.Rom.FastV(4, cos.SmoduleSpace) { 549 nlog.Infof("%s: rm old work %q, size=%d", j, workfqn, size) 550 } 551 } 552 } 553 } 554 j.oldWork = j.oldWork[:0] 555 556 // 2. rm misplaced 557 if len(j.misplaced.loms) > 0 && j.p.rmMisplaced() { 558 for _, mlom := range j.misplaced.loms { 559 var ( 560 fqn = mlom.FQN 561 removed bool 562 ) 563 lom := core.AllocLOM(mlom.ObjName) // yes placed 564 if lom.InitBck(&j.bck) != nil { 565 removed = os.Remove(fqn) == nil 566 } else if lom.FromFS() != nil { 567 removed = os.Remove(fqn) == nil 568 } else { 569 removed, _ = lom.DelExtraCopies(fqn) 570 } 571 core.FreeLOM(lom) 572 if removed { 573 fevicted++ 574 bevicted += mlom.SizeBytes(true /*not loaded*/) 575 if cmn.Rom.FastV(4, cos.SmoduleSpace) { 576 nlog.Infof("%s: rm misplaced %q, size=%d", j, mlom, mlom.SizeBytes(true /*not loaded*/)) 577 } 578 if err = j.yieldTerm(); err != nil { 579 return 580 } 581 } 582 } 583 } 584 j.misplaced.loms = j.misplaced.loms[:0] 585 586 // 3. rm EC slices and replicas that are still without correcponding metafile 587 for _, ct := range j.misplaced.ec { 588 metaFQN := fs.CSM.Gen(ct, fs.ECMetaType, "") 589 if cos.Stat(metaFQN) == nil { 590 continue 591 } 592 if os.Remove(ct.FQN()) == nil { 593 fevicted++ 594 bevicted += ct.SizeBytes() 595 if err = j.yieldTerm(); err != nil { 596 return 597 } 598 } 599 } 600 j.misplaced.ec = j.misplaced.ec[:0] 601 602 j.ini.StatsT.Add(stats.CleanupStoreSize, bevicted) // TODO -- FIXME 603 j.ini.StatsT.Add(stats.CleanupStoreCount, fevicted) 604 xcln.ObjsAdd(int(fevicted), bevicted) 605 return 606 } 607 608 func (j *clnJ) yieldTerm() error { 609 xcln := j.ini.Xaction 610 select { 611 case errCause := <-xcln.ChanAbort(): 612 return cmn.NewErrAborted(xcln.Name(), "", errCause) 613 case <-j.stopCh: 614 return cmn.NewErrAborted(xcln.Name(), "", nil) 615 default: 616 break 617 } 618 if xcln.Finished() { 619 return cmn.NewErrAborted(xcln.Name(), "", nil) 620 } 621 return nil 622 }