github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/space/cleanup.go (about)

     1  // Package space provides storage cleanup and eviction functionality (the latter based on the
     2  // least recently used cache replacement). It also serves as a built-in garbage-collection
     3  // mechanism for orphaned workfiles.
     4  /*
     5   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     6   */
     7  package space
     8  
     9  import (
    10  	"fmt"
    11  	"os"
    12  	"path/filepath"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/NVIDIA/aistore/api/apc"
    17  	"github.com/NVIDIA/aistore/cmn"
    18  	"github.com/NVIDIA/aistore/cmn/atomic"
    19  	"github.com/NVIDIA/aistore/cmn/cos"
    20  	"github.com/NVIDIA/aistore/cmn/debug"
    21  	"github.com/NVIDIA/aistore/cmn/nlog"
    22  	"github.com/NVIDIA/aistore/core"
    23  	"github.com/NVIDIA/aistore/core/meta"
    24  	"github.com/NVIDIA/aistore/fs"
    25  	"github.com/NVIDIA/aistore/ios"
    26  	"github.com/NVIDIA/aistore/stats"
    27  	"github.com/NVIDIA/aistore/xact"
    28  	"github.com/NVIDIA/aistore/xact/xreg"
    29  )
    30  
    31  type (
    32  	IniCln struct {
    33  		Config  *cmn.Config
    34  		Xaction *XactCln
    35  		StatsT  stats.Tracker
    36  		Buckets []cmn.Bck // optional list of specific buckets to cleanup
    37  		WG      *sync.WaitGroup
    38  	}
    39  	XactCln struct {
    40  		xact.Base
    41  	}
    42  )
    43  
    44  // private
    45  type (
    46  	// parent (contains mpath joggers)
    47  	clnP struct {
    48  		wg      sync.WaitGroup
    49  		joggers map[string]*clnJ
    50  		ini     IniCln
    51  		cs      struct {
    52  			a fs.CapStatus // initial
    53  			b fs.CapStatus // capacity after removing 'deleted'
    54  			c fs.CapStatus // upon finishing
    55  		}
    56  		jcnt atomic.Int32
    57  	}
    58  	// clnJ represents a single cleanup context and a single /jogger/
    59  	// that traverses and evicts a single given mountpath.
    60  	clnJ struct {
    61  		// runtime
    62  		oldWork   []string
    63  		misplaced struct {
    64  			loms []*core.LOM
    65  			ec   []*core.CT // EC slices and replicas without corresponding metafiles (CT FQN -> Meta FQN)
    66  		}
    67  		bck cmn.Bck
    68  		now int64
    69  		// init-time
    70  		p       *clnP
    71  		ini     *IniCln
    72  		stopCh  chan struct{}
    73  		joggers map[string]*clnJ
    74  		mi      *fs.Mountpath
    75  		config  *cmn.Config
    76  	}
    77  	clnFactory struct {
    78  		xreg.RenewBase
    79  		xctn *XactCln
    80  	}
    81  )
    82  
    83  // interface guard
    84  var (
    85  	_ xreg.Renewable = (*clnFactory)(nil)
    86  	_ core.Xact      = (*XactCln)(nil)
    87  )
    88  
    89  func (*XactCln) Run(*sync.WaitGroup) { debug.Assert(false) }
    90  
    91  func (r *XactCln) Snap() (snap *core.Snap) {
    92  	snap = &core.Snap{}
    93  	r.ToSnap(snap)
    94  
    95  	snap.IdleX = r.IsIdle()
    96  	return
    97  }
    98  
    99  ////////////////
   100  // clnFactory //
   101  ////////////////
   102  
   103  func (*clnFactory) New(args xreg.Args, _ *meta.Bck) xreg.Renewable {
   104  	return &clnFactory{RenewBase: xreg.RenewBase{Args: args}}
   105  }
   106  
   107  func (p *clnFactory) Start() error {
   108  	p.xctn = &XactCln{}
   109  	p.xctn.InitBase(p.UUID(), apc.ActStoreCleanup, nil)
   110  	return nil
   111  }
   112  
   113  func (*clnFactory) Kind() string     { return apc.ActStoreCleanup }
   114  func (p *clnFactory) Get() core.Xact { return p.xctn }
   115  
   116  func (*clnFactory) WhenPrevIsRunning(prevEntry xreg.Renewable) (wpr xreg.WPR, err error) {
   117  	return xreg.WprUse, cmn.NewErrXactUsePrev(prevEntry.Get().String())
   118  }
   119  
   120  func RunCleanup(ini *IniCln) fs.CapStatus {
   121  	var (
   122  		xcln           = ini.Xaction
   123  		config         = cmn.GCO.Get()
   124  		availablePaths = fs.GetAvail()
   125  		num            = len(availablePaths)
   126  		joggers        = make(map[string]*clnJ, num)
   127  		parent         = &clnP{joggers: joggers, ini: *ini}
   128  	)
   129  	defer func() {
   130  		if ini.WG != nil {
   131  			ini.WG.Done()
   132  		}
   133  	}()
   134  	if num == 0 {
   135  		xcln.AddErr(cmn.ErrNoMountpaths, 0)
   136  		xcln.Finish()
   137  		return fs.CapStatus{}
   138  	}
   139  	for mpath, mi := range availablePaths {
   140  		joggers[mpath] = &clnJ{
   141  			oldWork: make([]string, 0, 64),
   142  			stopCh:  make(chan struct{}, 1),
   143  			mi:      mi,
   144  			config:  config,
   145  			ini:     &parent.ini,
   146  			p:       parent,
   147  		}
   148  		joggers[mpath].misplaced.loms = make([]*core.LOM, 0, 64)
   149  		joggers[mpath].misplaced.ec = make([]*core.CT, 0, 64)
   150  	}
   151  	parent.jcnt.Store(int32(len(joggers)))
   152  	providers := apc.Providers.ToSlice()
   153  	for _, j := range joggers {
   154  		parent.wg.Add(1)
   155  		j.joggers = joggers
   156  		go j.run(providers)
   157  	}
   158  
   159  	parent.cs.a = fs.Cap()
   160  	nlog.Infoln(xcln.Name(), "started: ", xcln, parent.cs.a.String())
   161  	if ini.WG != nil {
   162  		ini.WG.Done()
   163  		ini.WG = nil
   164  	}
   165  	parent.wg.Wait()
   166  
   167  	for _, j := range joggers {
   168  		j.stop()
   169  	}
   170  
   171  	var err, errCap error
   172  	parent.cs.c, err, errCap = fs.CapRefresh(config, nil /*tcdf*/)
   173  	if err != nil {
   174  		xcln.AddErr(err)
   175  	}
   176  	if errCap != nil {
   177  		xcln.AddErr(errCap)
   178  	}
   179  	xcln.Finish()
   180  	nlog.Infoln(xcln.Name(), "finished:", errCap)
   181  
   182  	return parent.cs.c
   183  }
   184  
   185  func (p *clnP) rmMisplaced() bool {
   186  	var (
   187  		g = xreg.GetRebMarked()
   188  		l = xreg.GetResilverMarked()
   189  	)
   190  	if g.Xact == nil && l.Xact == nil && !g.Interrupted && !g.Restarted && !l.Interrupted {
   191  		return true
   192  	}
   193  
   194  	// log
   195  	var warn, info string
   196  	if p.cs.a.Err() != nil {
   197  		warn = fmt.Sprintf("%s: %s but not removing misplaced/obsolete copies: ", p.ini.Xaction, p.cs.a.String())
   198  	} else {
   199  		warn = fmt.Sprintf("%s: not removing misplaced/obsolete copies: ", p.ini.Xaction)
   200  	}
   201  	switch {
   202  	case g.Xact != nil:
   203  		info = g.Xact.String() + " is running"
   204  	case g.Interrupted:
   205  		info = "rebalance interrupted"
   206  	case g.Restarted:
   207  		info = "node restarted"
   208  	case l.Xact != nil:
   209  		info = l.Xact.String() + " is running"
   210  	case l.Interrupted:
   211  		info = "resilver interrupted"
   212  	}
   213  	if p.cs.a.Err() != nil {
   214  		nlog.Errorln(warn + info)
   215  	} else {
   216  		nlog.Warningln(warn + info)
   217  	}
   218  	return false
   219  }
   220  
   221  //////////
   222  // clnJ //
   223  //////////
   224  
   225  // mountpath cleanup j
   226  
   227  func (j *clnJ) String() string {
   228  	return fmt.Sprintf("%s: jog-%s", j.ini.Xaction, j.mi)
   229  }
   230  
   231  func (j *clnJ) stop() { j.stopCh <- struct{}{} }
   232  
   233  func (j *clnJ) run(providers []string) {
   234  	const f = "%s: freed space %s (not including removed 'deleted')"
   235  	var (
   236  		size     int64
   237  		err, erm error
   238  	)
   239  	// globally
   240  	erm = j.removeDeleted()
   241  	if erm != nil {
   242  		nlog.Errorln(erm)
   243  	}
   244  
   245  	// traverse
   246  	if len(j.ini.Buckets) != 0 {
   247  		size, err = j.jogBcks(j.ini.Buckets)
   248  	} else {
   249  		size, err = j.jog(providers)
   250  	}
   251  	if err == nil {
   252  		err = erm
   253  	}
   254  	if err == nil {
   255  		if size != 0 {
   256  			nlog.Infof(f, j, cos.ToSizeIEC(size, 1))
   257  		}
   258  	} else {
   259  		nlog.Errorf(f+", err: %v", j, cos.ToSizeIEC(size, 1), err)
   260  	}
   261  	j.p.wg.Done()
   262  }
   263  
   264  func (j *clnJ) jog(providers []string) (size int64, rerr error) {
   265  	for _, provider := range providers { // for each provider (NOTE: ordering is random)
   266  		var (
   267  			sz   int64
   268  			bcks []cmn.Bck
   269  			err  error
   270  			opts = fs.WalkOpts{Mi: j.mi, Bck: cmn.Bck{Provider: provider, Ns: cmn.NsGlobal}}
   271  		)
   272  		if bcks, err = fs.AllMpathBcks(&opts); err != nil {
   273  			nlog.Errorln(err)
   274  			if rerr == nil {
   275  				rerr = err
   276  			}
   277  			continue
   278  		}
   279  		if len(bcks) == 0 {
   280  			continue
   281  		}
   282  		sz, err = j.jogBcks(bcks)
   283  		size += sz
   284  		if err != nil && rerr == nil {
   285  			rerr = err
   286  		}
   287  	}
   288  	return
   289  }
   290  
   291  func (j *clnJ) jogBcks(bcks []cmn.Bck) (size int64, rerr error) {
   292  	bowner := core.T.Bowner()
   293  	for i := range bcks { // for each bucket under a given provider
   294  		var (
   295  			err error
   296  			sz  int64
   297  			bck = bcks[i]
   298  			b   = meta.CloneBck(&bck)
   299  		)
   300  		j.bck = bck
   301  		err = b.Init(bowner)
   302  		if err != nil {
   303  			if cmn.IsErrBckNotFound(err) || cmn.IsErrRemoteBckNotFound(err) {
   304  				const act = "delete non-existing"
   305  				if err = fs.DestroyBucket(act, &bck, 0 /*unknown BID*/); err == nil {
   306  					nlog.Infof("%s: %s %s", j, act, bck)
   307  				} else {
   308  					j.ini.Xaction.AddErr(err)
   309  					nlog.Errorf("%s %s: %v - skipping", j, act, err)
   310  				}
   311  			} else {
   312  				// TODO: config option to scrub `fs.AllMpathBcks` buckets
   313  				j.ini.Xaction.AddErr(err)
   314  				nlog.Errorf("%s: %v - skipping %s", j, err, bck)
   315  			}
   316  			continue
   317  		}
   318  		sz, err = j.jogBck()
   319  		size += sz
   320  		if err != nil && rerr == nil {
   321  			rerr = err
   322  		}
   323  	}
   324  	return size, rerr
   325  }
   326  
   327  func (j *clnJ) removeDeleted() (err error) {
   328  	err = j.mi.RemoveDeleted(j.String())
   329  	if err != nil {
   330  		j.ini.Xaction.AddErr(err)
   331  	}
   332  	if cnt := j.p.jcnt.Dec(); cnt > 0 {
   333  		return
   334  	}
   335  
   336  	// last rm-deleted done: refresh cap now
   337  	var errCap error
   338  	j.p.cs.b, err, errCap = fs.CapRefresh(j.config, nil /*tcdf*/)
   339  	if err != nil {
   340  		j.ini.Xaction.Abort(err)
   341  	} else {
   342  		nlog.Infoln(j.ini.Xaction.Name(), "post-rm('deleted'):", errCap)
   343  	}
   344  	return
   345  }
   346  
   347  func (j *clnJ) jogBck() (size int64, err error) {
   348  	opts := &fs.WalkOpts{
   349  		Mi:       j.mi,
   350  		Bck:      j.bck,
   351  		CTs:      []string{fs.WorkfileType, fs.ObjectType, fs.ECSliceType, fs.ECMetaType},
   352  		Callback: j.walk,
   353  		Sorted:   false,
   354  	}
   355  	j.now = time.Now().UnixNano()
   356  	if err = fs.Walk(opts); err != nil {
   357  		return
   358  	}
   359  	size, err = j.rmLeftovers()
   360  	return
   361  }
   362  
   363  func (j *clnJ) visitCT(parsedFQN *fs.ParsedFQN, fqn string) {
   364  	switch parsedFQN.ContentType {
   365  	case fs.WorkfileType:
   366  		_, base := filepath.Split(fqn)
   367  		contentResolver := fs.CSM.Resolver(fs.WorkfileType)
   368  		_, old, ok := contentResolver.ParseUniqueFQN(base)
   369  		// workfiles: remove old or do nothing
   370  		if ok && old {
   371  			j.oldWork = append(j.oldWork, fqn)
   372  		}
   373  	case fs.ECSliceType:
   374  		// EC slices:
   375  		// - EC enabled: remove only slices with missing metafiles
   376  		// - EC disabled: remove all slices
   377  		ct, err := core.NewCTFromFQN(fqn, core.T.Bowner())
   378  		if err != nil || !ct.Bck().Props.EC.Enabled {
   379  			j.oldWork = append(j.oldWork, fqn)
   380  			return
   381  		}
   382  		if err := ct.LoadFromFS(); err != nil {
   383  			return
   384  		}
   385  		// Saving a CT is not atomic: first it saves CT, then its metafile
   386  		// follows. Ignore just updated CTs to avoid processing incomplete data.
   387  		if ct.MtimeUnix()+int64(j.config.LRU.DontEvictTime) > j.now {
   388  			return
   389  		}
   390  		metaFQN := fs.CSM.Gen(ct, fs.ECMetaType, "")
   391  		if cos.Stat(metaFQN) != nil {
   392  			j.misplaced.ec = append(j.misplaced.ec, ct)
   393  		}
   394  	case fs.ECMetaType:
   395  		// EC metafiles:
   396  		// - EC enabled: remove only without corresponding slice or replica
   397  		// - EC disabled: remove all metafiles
   398  		ct, err := core.NewCTFromFQN(fqn, core.T.Bowner())
   399  		if err != nil || !ct.Bck().Props.EC.Enabled {
   400  			j.oldWork = append(j.oldWork, fqn)
   401  			return
   402  		}
   403  		// Metafile is saved the last. If there is no corresponding replica or
   404  		// slice, it is safe to remove the stray metafile.
   405  		sliceCT := ct.Clone(fs.ECSliceType)
   406  		if cos.Stat(sliceCT.FQN()) == nil {
   407  			return
   408  		}
   409  		objCT := ct.Clone(fs.ObjectType)
   410  		if cos.Stat(objCT.FQN()) == nil {
   411  			return
   412  		}
   413  		j.oldWork = append(j.oldWork, fqn)
   414  	default:
   415  		debug.Assertf(false, "Unsupported content type: %s", parsedFQN.ContentType)
   416  	}
   417  }
   418  
   419  // TODO: add stats error counters (stats.ErrLmetaCorruptedCount, ...)
   420  // TODO: revisit rm-ed byte counting
   421  func (j *clnJ) visitObj(fqn string, lom *core.LOM) {
   422  	if err := lom.InitFQN(fqn, &j.bck); err != nil {
   423  		return
   424  	}
   425  	// handle load err
   426  	if errLoad := lom.Load(false /*cache it*/, false /*locked*/); errLoad != nil {
   427  		_, atime, err := ios.FinfoAtime(lom.FQN)
   428  		if err != nil {
   429  			if !os.IsNotExist(err) {
   430  				err = os.NewSyscallError("stat", err)
   431  				j.ini.Xaction.AddErr(err)
   432  				core.T.FSHC(err, lom.FQN)
   433  			}
   434  			return
   435  		}
   436  		// too early to remove anything
   437  		if atime+int64(j.config.LRU.DontEvictTime) < j.now {
   438  			return
   439  		}
   440  		if cmn.IsErrLmetaCorrupted(err) {
   441  			if err := cos.RemoveFile(lom.FQN); err != nil {
   442  				nlog.Errorf("%s: failed to rm MD-corrupted %s: %v (nested: %v)", j, lom, errLoad, err)
   443  				j.ini.Xaction.AddErr(err)
   444  			} else {
   445  				nlog.Errorf("%s: removed MD-corrupted %s: %v", j, lom, errLoad)
   446  			}
   447  		} else if cmn.IsErrLmetaNotFound(err) {
   448  			if err := cos.RemoveFile(lom.FQN); err != nil {
   449  				nlog.Errorf("%s: failed to rm no-MD %s: %v (nested: %v)", j, lom, errLoad, err)
   450  				j.ini.Xaction.AddErr(err)
   451  			} else {
   452  				nlog.Errorf("%s: removed no-MD %s: %v", j, lom, errLoad)
   453  			}
   454  		}
   455  		return
   456  	}
   457  	// too early
   458  	if lom.AtimeUnix()+int64(j.config.LRU.DontEvictTime) > j.now {
   459  		if cmn.Rom.FastV(5, cos.SmoduleSpace) {
   460  			nlog.Infof("too early for %s: atime %v", lom, lom.Atime())
   461  		}
   462  		return
   463  	}
   464  	if lom.IsHRW() {
   465  		if lom.HasCopies() {
   466  			j.rmExtraCopies(lom)
   467  		}
   468  		return
   469  	}
   470  	if lom.IsCopy() {
   471  		return
   472  	}
   473  	if lom.ECEnabled() {
   474  		metaFQN := fs.CSM.Gen(lom, fs.ECMetaType, "")
   475  		if cos.Stat(metaFQN) != nil {
   476  			j.misplaced.ec = append(j.misplaced.ec, core.NewCTFromLOM(lom, fs.ObjectType))
   477  		}
   478  	} else {
   479  		j.misplaced.loms = append(j.misplaced.loms, lom)
   480  	}
   481  }
   482  
   483  func (j *clnJ) rmExtraCopies(lom *core.LOM) {
   484  	if !lom.TryLock(true) {
   485  		return // must be busy
   486  	}
   487  	defer lom.Unlock(true)
   488  	// reload under lock and check atime - again
   489  	if err := lom.Load(false /*cache it*/, true /*locked*/); err != nil {
   490  		if !cos.IsNotExist(err, 0) {
   491  			j.ini.Xaction.AddErr(err)
   492  		}
   493  		return
   494  	}
   495  	if lom.AtimeUnix()+int64(j.config.LRU.DontEvictTime) > j.now {
   496  		return
   497  	}
   498  	if lom.IsCopy() {
   499  		return // extremely unlikely but ok
   500  	}
   501  	if _, err := lom.DelExtraCopies(); err != nil {
   502  		err = fmt.Errorf("%s: failed delete redundant copies of %s: %v", j, lom, err)
   503  		j.ini.Xaction.AddErr(err, 5, cos.SmoduleSpace)
   504  	}
   505  }
   506  
   507  func (j *clnJ) walk(fqn string, de fs.DirEntry) error {
   508  	var parsed fs.ParsedFQN
   509  	if de.IsDir() {
   510  		return nil
   511  	}
   512  	if err := j.yieldTerm(); err != nil {
   513  		return err
   514  	}
   515  	if _, err := core.ResolveFQN(fqn, &parsed); err != nil {
   516  		return nil
   517  	}
   518  	if parsed.ContentType != fs.ObjectType {
   519  		j.visitCT(&parsed, fqn)
   520  	} else {
   521  		lom := core.AllocLOM("")
   522  		j.visitObj(fqn, lom)
   523  		core.FreeLOM(lom)
   524  	}
   525  	return nil
   526  }
   527  
   528  // TODO: remove disfunctional files as soon as possible without adding them to slices.
   529  func (j *clnJ) rmLeftovers() (size int64, err error) {
   530  	var (
   531  		fevicted, bevicted int64
   532  		xcln               = j.ini.Xaction
   533  	)
   534  	if cmn.Rom.FastV(4, cos.SmoduleSpace) {
   535  		nlog.Infof("%s: num-old %d, misplaced (%d, ec=%d)", j, len(j.oldWork), len(j.misplaced.loms), len(j.misplaced.ec))
   536  	}
   537  
   538  	// 1. rm older work
   539  	for _, workfqn := range j.oldWork {
   540  		finfo, erw := os.Stat(workfqn)
   541  		if erw == nil {
   542  			if err := cos.RemoveFile(workfqn); err != nil {
   543  				nlog.Errorf("%s: failed to rm old work %q: %v", j, workfqn, err)
   544  			} else {
   545  				size += finfo.Size()
   546  				fevicted++
   547  				bevicted += finfo.Size()
   548  				if cmn.Rom.FastV(4, cos.SmoduleSpace) {
   549  					nlog.Infof("%s: rm old work %q, size=%d", j, workfqn, size)
   550  				}
   551  			}
   552  		}
   553  	}
   554  	j.oldWork = j.oldWork[:0]
   555  
   556  	// 2. rm misplaced
   557  	if len(j.misplaced.loms) > 0 && j.p.rmMisplaced() {
   558  		for _, mlom := range j.misplaced.loms {
   559  			var (
   560  				fqn     = mlom.FQN
   561  				removed bool
   562  			)
   563  			lom := core.AllocLOM(mlom.ObjName) // yes placed
   564  			if lom.InitBck(&j.bck) != nil {
   565  				removed = os.Remove(fqn) == nil
   566  			} else if lom.FromFS() != nil {
   567  				removed = os.Remove(fqn) == nil
   568  			} else {
   569  				removed, _ = lom.DelExtraCopies(fqn)
   570  			}
   571  			core.FreeLOM(lom)
   572  			if removed {
   573  				fevicted++
   574  				bevicted += mlom.SizeBytes(true /*not loaded*/)
   575  				if cmn.Rom.FastV(4, cos.SmoduleSpace) {
   576  					nlog.Infof("%s: rm misplaced %q, size=%d", j, mlom, mlom.SizeBytes(true /*not loaded*/))
   577  				}
   578  				if err = j.yieldTerm(); err != nil {
   579  					return
   580  				}
   581  			}
   582  		}
   583  	}
   584  	j.misplaced.loms = j.misplaced.loms[:0]
   585  
   586  	// 3. rm EC slices and replicas that are still without correcponding metafile
   587  	for _, ct := range j.misplaced.ec {
   588  		metaFQN := fs.CSM.Gen(ct, fs.ECMetaType, "")
   589  		if cos.Stat(metaFQN) == nil {
   590  			continue
   591  		}
   592  		if os.Remove(ct.FQN()) == nil {
   593  			fevicted++
   594  			bevicted += ct.SizeBytes()
   595  			if err = j.yieldTerm(); err != nil {
   596  				return
   597  			}
   598  		}
   599  	}
   600  	j.misplaced.ec = j.misplaced.ec[:0]
   601  
   602  	j.ini.StatsT.Add(stats.CleanupStoreSize, bevicted) // TODO -- FIXME
   603  	j.ini.StatsT.Add(stats.CleanupStoreCount, fevicted)
   604  	xcln.ObjsAdd(int(fevicted), bevicted)
   605  	return
   606  }
   607  
   608  func (j *clnJ) yieldTerm() error {
   609  	xcln := j.ini.Xaction
   610  	select {
   611  	case errCause := <-xcln.ChanAbort():
   612  		return cmn.NewErrAborted(xcln.Name(), "", errCause)
   613  	case <-j.stopCh:
   614  		return cmn.NewErrAborted(xcln.Name(), "", nil)
   615  	default:
   616  		break
   617  	}
   618  	if xcln.Finished() {
   619  		return cmn.NewErrAborted(xcln.Name(), "", nil)
   620  	}
   621  	return nil
   622  }