github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/res/resilver.go (about)

     1  // Package res provides local volume resilvering upon mountpath-attach and similar
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package res
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"path/filepath"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/NVIDIA/aistore/api/apc"
    15  	"github.com/NVIDIA/aistore/cmn"
    16  	"github.com/NVIDIA/aistore/cmn/atomic"
    17  	"github.com/NVIDIA/aistore/cmn/cos"
    18  	"github.com/NVIDIA/aistore/cmn/debug"
    19  	"github.com/NVIDIA/aistore/cmn/fname"
    20  	"github.com/NVIDIA/aistore/cmn/mono"
    21  	"github.com/NVIDIA/aistore/cmn/nlog"
    22  	"github.com/NVIDIA/aistore/core"
    23  	"github.com/NVIDIA/aistore/fs"
    24  	"github.com/NVIDIA/aistore/fs/mpather"
    25  	"github.com/NVIDIA/aistore/memsys"
    26  	"github.com/NVIDIA/aistore/xact"
    27  	"github.com/NVIDIA/aistore/xact/xreg"
    28  	"github.com/NVIDIA/aistore/xact/xs"
    29  )
    30  
    31  const timedDuration = 4 * time.Second // see also: timedDuration in tgtgfn.go
    32  
    33  type (
    34  	Res struct {
    35  		// last or current resilver's time interval
    36  		begin atomic.Int64
    37  		end   atomic.Int64
    38  	}
    39  	Args struct {
    40  		UUID              string
    41  		Notif             *xact.NotifXact
    42  		Rmi               *fs.Mountpath
    43  		Action            string
    44  		PostDD            func(rmi *fs.Mountpath, action string, xres *xs.Resilver, err error)
    45  		SkipGlobMisplaced bool
    46  		SingleRmiJogger   bool
    47  	}
    48  	joggerCtx struct {
    49  		xres   *xs.Resilver
    50  		config *cmn.Config
    51  	}
    52  )
    53  
    54  func New() *Res { return &Res{} }
    55  
    56  func (res *Res) IsActive(multiplier int64) (yes bool) {
    57  	begin := res.begin.Load()
    58  	if begin == 0 {
    59  		return
    60  	}
    61  	now := mono.NanoTime()
    62  	if now-begin < multiplier*int64(timedDuration) {
    63  		yes = true
    64  	} else {
    65  		end := res.end.Load()
    66  		yes = end == 0 || time.Duration(now-end) < timedDuration
    67  	}
    68  	return
    69  }
    70  
    71  func (res *Res) _begin() {
    72  	res.begin.Store(mono.NanoTime())
    73  	res.end.Store(0)
    74  }
    75  
    76  func (res *Res) _end() {
    77  	res.end.Store(mono.NanoTime())
    78  }
    79  
    80  func (res *Res) RunResilver(args Args) {
    81  	res._begin()
    82  	defer res._end()
    83  	if fatalErr, writeErr := fs.PersistMarker(fname.ResilverMarker); fatalErr != nil || writeErr != nil {
    84  		nlog.Errorf("FATAL: %v, WRITE: %v", fatalErr, writeErr)
    85  		return
    86  	}
    87  	availablePaths, _ := fs.Get()
    88  	if len(availablePaths) < 1 {
    89  		nlog.Errorln(cmn.ErrNoMountpaths)
    90  		return
    91  	}
    92  	xres := xreg.RenewResilver(args.UUID).(*xs.Resilver)
    93  	if args.Notif != nil {
    94  		args.Notif.Xact = xres
    95  		xres.AddNotif(args.Notif)
    96  	}
    97  
    98  	// jogger group
    99  	var (
   100  		jg        *mpather.Jgroup
   101  		slab, err = core.T.PageMM().GetSlab(memsys.MaxPageSlabSize)
   102  		config    = cmn.GCO.Get()
   103  		jctx      = &joggerCtx{xres: xres, config: config}
   104  
   105  		opts = &mpather.JgroupOpts{
   106  			CTs:                   []string{fs.ObjectType, fs.ECSliceType},
   107  			VisitObj:              jctx.visitObj,
   108  			VisitCT:               jctx.visitCT,
   109  			Slab:                  slab,
   110  			SkipGloballyMisplaced: args.SkipGlobMisplaced,
   111  		}
   112  	)
   113  	debug.AssertNoErr(err)
   114  	debug.Assert(args.PostDD == nil || (args.Action == apc.ActMountpathDetach || args.Action == apc.ActMountpathDisable))
   115  
   116  	if args.SingleRmiJogger {
   117  		jg = mpather.NewJoggerGroup(opts, config, args.Rmi.Path)
   118  		nlog.Infof("%s, action %q, jogger->(%q)", xres.Name(), args.Action, args.Rmi)
   119  	} else {
   120  		jg = mpather.NewJoggerGroup(opts, config, "")
   121  		if args.Rmi != nil {
   122  			nlog.Infof("%s, action %q, rmi %s, num %d", xres.Name(), args.Action, args.Rmi, jg.Num())
   123  		} else {
   124  			nlog.Infof("%s, num %d", xres.Name(), jg.Num())
   125  		}
   126  	}
   127  
   128  	// run and block waiting
   129  	res.end.Store(0)
   130  	jg.Run()
   131  	err = wait(jg, xres)
   132  	if err != nil {
   133  		xres.AddErr(err)
   134  	}
   135  	// callback to, finally, detach-disable
   136  	if args.PostDD != nil {
   137  		args.PostDD(args.Rmi, args.Action, xres, err)
   138  	}
   139  	xres.Finish()
   140  }
   141  
   142  // Wait for an abort or for resilvering joggers to finish.
   143  func wait(jg *mpather.Jgroup, xres *xs.Resilver) (err error) {
   144  	for {
   145  		select {
   146  		case errCause := <-xres.ChanAbort():
   147  			if err = jg.Stop(); err != nil {
   148  				xres.AddErr(err, 0)
   149  			} else {
   150  				nlog.Infoln(core.T.String()+":", xres.Name(), "aborted, cause:", errCause)
   151  			}
   152  			return cmn.NewErrAborted(xres.Name(), "", errCause)
   153  		case <-jg.ListenFinished():
   154  			if err = fs.RemoveMarker(fname.ResilverMarker); err == nil {
   155  				nlog.Infoln(core.T.String()+":", xres.Name(), "removed marker ok")
   156  			}
   157  			return
   158  		}
   159  	}
   160  }
   161  
   162  // Copies a slice and its metafile (if exists) to the current mpath. At the
   163  // end does proper cleanup: removes ether source files(on success), or
   164  // destination files(on copy failure)
   165  func (jg *joggerCtx) _mvSlice(ct *core.CT, buf []byte) {
   166  	uname := ct.Bck().MakeUname(ct.ObjectName())
   167  	destMpath, _, err := fs.Hrw(uname)
   168  	if err != nil {
   169  		jg.xres.AddErr(err)
   170  		nlog.Infoln("Warning:", err)
   171  		return
   172  	}
   173  	if destMpath.Path == ct.Mountpath().Path {
   174  		return
   175  	}
   176  
   177  	destFQN := destMpath.MakePathFQN(ct.Bucket(), fs.ECSliceType, ct.ObjectName())
   178  	srcMetaFQN, destMetaFQN, err := _moveECMeta(ct, ct.Mountpath(), destMpath, buf)
   179  	if err != nil {
   180  		jg.xres.AddErr(err)
   181  		return
   182  	}
   183  	// Slice without metafile - skip it as unusable, let LRU clean it up
   184  	if srcMetaFQN == "" {
   185  		return
   186  	}
   187  	if cmn.Rom.FastV(4, cos.SmoduleReb) {
   188  		nlog.Infof("%s: moving %q -> %q", core.T, ct.FQN(), destFQN)
   189  	}
   190  	if _, _, err = cos.CopyFile(ct.FQN(), destFQN, buf, cos.ChecksumNone); err != nil {
   191  		errV := fmt.Errorf("failed to copy %q -> %q: %v. Rolling back", ct.FQN(), destFQN, err)
   192  		jg.xres.AddErr(errV, 0)
   193  		if err = os.Remove(destMetaFQN); err != nil {
   194  			errV := fmt.Errorf("failed to cleanup metafile %q: %v", destMetaFQN, err)
   195  			nlog.Infoln("Warning:", errV)
   196  			jg.xres.AddErr(errV)
   197  		}
   198  	}
   199  	errMeta := os.Remove(srcMetaFQN)
   200  	errSlice := os.Remove(ct.FQN())
   201  	if errMeta != nil || errSlice != nil {
   202  		nlog.Warningf("Failed to cleanup %q: %v, %v", ct.FQN(), errSlice, errMeta)
   203  	}
   204  }
   205  
   206  // Copies EC metafile to correct mpath. It returns FQNs of the source and
   207  // destination for a caller to do proper cleanup. Empty values means: either
   208  // the source FQN does not exist(err==nil), or copying failed
   209  func _moveECMeta(ct *core.CT, srcMpath, dstMpath *fs.Mountpath, buf []byte) (string, string, error) {
   210  	src := srcMpath.MakePathFQN(ct.Bucket(), fs.ECMetaType, ct.ObjectName())
   211  	// If metafile does not exist it may mean that EC has not processed the
   212  	// object yet (e.g, EC was enabled after the bucket was filled), or
   213  	// the metafile has gone
   214  	if err := cos.Stat(src); os.IsNotExist(err) {
   215  		return "", "", nil
   216  	}
   217  	dst := dstMpath.MakePathFQN(ct.Bucket(), fs.ECMetaType, ct.ObjectName())
   218  	_, _, err := cos.CopyFile(src, dst, buf, cos.ChecksumNone)
   219  	if err == nil {
   220  		return src, dst, nil
   221  	}
   222  	if os.IsNotExist(err) {
   223  		err = nil
   224  	}
   225  	return "", "", err
   226  }
   227  
   228  // TODO: revisit EC bits and check for OOS preemptively
   229  // NOTE: not deleting extra copies - delegating to `storage cleanup`
   230  func (jg *joggerCtx) visitObj(lom *core.LOM, buf []byte) (errHrw error) {
   231  	const maxRetries = 3
   232  	var (
   233  		orig   = lom
   234  		hlom   *core.LOM
   235  		xname  = jg.xres.Name()
   236  		size   int64
   237  		copied bool
   238  	)
   239  	if !lom.TryLock(true) { // NOTE: skipping busy
   240  		time.Sleep(time.Second >> 1)
   241  		if !lom.TryLock(true) {
   242  			return
   243  		}
   244  	}
   245  	// cleanup
   246  	defer func() {
   247  		lom = orig
   248  		lom.Unlock(true)
   249  		if copied && errHrw == nil {
   250  			jg.xres.ObjsAdd(1, size)
   251  		}
   252  	}()
   253  
   254  	// 1. fix EC metafile
   255  	var metaOldPath, metaNewPath string
   256  	if !lom.IsHRW() && lom.ECEnabled() {
   257  		var parsed fs.ParsedFQN
   258  		_, err := core.ResolveFQN(lom.HrwFQN, &parsed)
   259  		if err != nil {
   260  			nlog.Warningf("%s: %s %v", xname, lom, err)
   261  			return nil
   262  		}
   263  		ct := core.NewCTFromLOM(lom, fs.ObjectType)
   264  		// copy metafile
   265  		metaOldPath, metaNewPath, err = _moveECMeta(ct, lom.Mountpath(), parsed.Mountpath, buf)
   266  		if err != nil {
   267  			nlog.Warningf("%s: failed to copy EC metafile %s %q -> %q: %v", xname, lom, lom.Mountpath().Path,
   268  				parsed.Mountpath.Path, err)
   269  			return nil
   270  		}
   271  	}
   272  
   273  	if err := lom.Load(false /*cache it*/, true /*locked*/); err != nil {
   274  		return nil
   275  	}
   276  	size = lom.SizeBytes()
   277  	// 2. fix hrw location; fail and subsequently abort if unsuccessful
   278  	var (
   279  		retries   int
   280  		mi, isHrw = lom.ToMpath()
   281  	)
   282  	if mi == nil {
   283  		goto ret // nothing to do
   284  	}
   285  redo:
   286  	if isHrw {
   287  		// cannot have it associated with a non-hrw mp; TODO: !lom.WritePolicy().IsImmediate()
   288  		lom.Uncache()
   289  
   290  		hlom, errHrw = jg.fixHrw(lom, mi, buf)
   291  		if errHrw != nil {
   292  			if !os.IsNotExist(errHrw) && !strings.Contains(errHrw.Error(), "does not exist") {
   293  				errV := fmt.Errorf("%s: failed to restore %s, errHrw: %v", xname, lom, errHrw)
   294  				jg.xres.AddErr(errV, 0)
   295  			}
   296  			// EC cleanup and return
   297  			if metaNewPath != "" {
   298  				if errHrw = os.Remove(metaNewPath); errHrw != nil {
   299  					errV := fmt.Errorf("%s: nested (%s %s: %v)", xname, lom, metaNewPath, errHrw)
   300  					nlog.Infoln("Warning:", errV)
   301  					jg.xres.AddErr(errV, 0)
   302  				}
   303  			}
   304  			return
   305  		}
   306  		lom = hlom
   307  		copied = true
   308  	}
   309  
   310  	// 3. fix copies
   311  	for {
   312  		mi, isHrw := lom.ToMpath()
   313  		if mi == nil {
   314  			break
   315  		}
   316  		if isHrw {
   317  			// redo hlom in an unlikely event
   318  			retries++
   319  			if retries > maxRetries {
   320  				hmi := "???"
   321  				if hlom != nil && hlom.Mountpath() != nil {
   322  					hmi = hlom.Mountpath().String()
   323  				}
   324  				errHrw = fmt.Errorf("%s: hrw mountpaths keep changing (%s(%s) => %s => %s ...)",
   325  					xname, orig, orig.Mountpath(), hmi, mi)
   326  				jg.xres.AddErr(errHrw, 0)
   327  				return
   328  			}
   329  			copied = false
   330  			lom, hlom = orig, nil
   331  			time.Sleep(cmn.Rom.CplaneOperation() / 2)
   332  			goto redo
   333  		}
   334  		err := lom.Copy(mi, buf)
   335  		if err == nil {
   336  			copied = true
   337  			continue
   338  		}
   339  		if cos.IsErrOOS(err) {
   340  			errV := fmt.Errorf("%s: %s OOS, err: %w", core.T, mi, err)
   341  			jg.xres.AddErr(errV, 0)
   342  			err = cmn.NewErrAborted(xname, "", errV)
   343  		} else if !os.IsNotExist(err) && !strings.Contains(err.Error(), "does not exist") {
   344  			errV := fmt.Errorf("%s: failed to copy %s to %s, err: %w", xname, lom, mi, err)
   345  			nlog.Infoln("Warning:", errV)
   346  			jg.xres.AddErr(errV)
   347  		}
   348  		break
   349  	}
   350  ret:
   351  	// EC: remove old metafile
   352  	if metaOldPath != "" {
   353  		if err := os.Remove(metaOldPath); err != nil {
   354  			nlog.Warningf("%s: failed to cleanup %s old metafile %q: %v", xname, lom, metaOldPath, err)
   355  		}
   356  	}
   357  	return nil
   358  }
   359  
   360  func (*joggerCtx) fixHrw(lom *core.LOM, mi *fs.Mountpath, buf []byte) (hlom *core.LOM, err error) {
   361  	if err = lom.Copy(mi, buf); err != nil {
   362  		return
   363  	}
   364  	hrwFQN := mi.MakePathFQN(lom.Bucket(), fs.ObjectType, lom.ObjName)
   365  	hlom = &core.LOM{}
   366  	if err = hlom.InitFQN(hrwFQN, lom.Bucket()); err != nil {
   367  		return
   368  	}
   369  	debug.Assert(hlom.Mountpath().Path == mi.Path)
   370  
   371  	// reload; cache iff write-policy != immediate
   372  	err = hlom.Load(!hlom.WritePolicy().IsImmediate() /*cache it*/, true /*locked*/)
   373  	return
   374  }
   375  
   376  func (jg *joggerCtx) visitCT(ct *core.CT, buf []byte) (err error) {
   377  	debug.Assert(ct.ContentType() == fs.ECSliceType)
   378  	if !ct.Bck().Props.EC.Enabled {
   379  		// Since `%ec` directory is inside a bucket, it is safe to skip
   380  		// the entire `%ec` directory when EC is disabled for the bucket.
   381  		return filepath.SkipDir
   382  	}
   383  	jg._mvSlice(ct, buf)
   384  	return nil
   385  }