github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/xact/xs/tcb.go (about)

     1  // Package xs is a collection of eXtended actions (xactions), including multi-object
     2  // operations, list-objects, (cluster) rebalance and (target) resilver, ETL, and more.
     3  /*
     4   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package xs
     7  
     8  import (
     9  	"fmt"
    10  	"io"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/NVIDIA/aistore/api/apc"
    15  	"github.com/NVIDIA/aistore/cmn"
    16  	"github.com/NVIDIA/aistore/cmn/atomic"
    17  	"github.com/NVIDIA/aistore/cmn/cos"
    18  	"github.com/NVIDIA/aistore/cmn/debug"
    19  	"github.com/NVIDIA/aistore/cmn/mono"
    20  	"github.com/NVIDIA/aistore/cmn/nlog"
    21  	"github.com/NVIDIA/aistore/core"
    22  	"github.com/NVIDIA/aistore/core/meta"
    23  	"github.com/NVIDIA/aistore/fs"
    24  	"github.com/NVIDIA/aistore/fs/mpather"
    25  	"github.com/NVIDIA/aistore/memsys"
    26  	"github.com/NVIDIA/aistore/transport"
    27  	"github.com/NVIDIA/aistore/transport/bundle"
    28  	"github.com/NVIDIA/aistore/xact"
    29  	"github.com/NVIDIA/aistore/xact/xreg"
    30  )
    31  
    32  type (
    33  	tcbFactory struct {
    34  		xreg.RenewBase
    35  		xctn  *XactTCB
    36  		kind  string
    37  		phase string // (see "transition")
    38  		args  *xreg.TCBArgs
    39  		owt   cmn.OWT
    40  	}
    41  	XactTCB struct {
    42  		p      *tcbFactory
    43  		dm     *bundle.DataMover
    44  		rxlast atomic.Int64 // finishing
    45  		xact.BckJog
    46  		prune    prune
    47  		nam, str string
    48  		wg       sync.WaitGroup // starting up
    49  		refc     atomic.Int32   // finishing
    50  	}
    51  )
    52  
    53  const OpcTxnDone = 27182
    54  
    55  const etlBucketParallelCnt = 2
    56  
    57  // interface guard
    58  var (
    59  	_ core.Xact      = (*XactTCB)(nil)
    60  	_ xreg.Renewable = (*tcbFactory)(nil)
    61  )
    62  
    63  ////////////////
    64  // tcbFactory //
    65  ////////////////
    66  
    67  func (p *tcbFactory) New(args xreg.Args, bck *meta.Bck) xreg.Renewable {
    68  	custom := args.Custom.(*xreg.TCBArgs)
    69  	return &tcbFactory{RenewBase: xreg.RenewBase{Args: args, Bck: bck}, kind: p.kind, phase: custom.Phase, args: custom}
    70  }
    71  
    72  func (p *tcbFactory) Start() error {
    73  	var (
    74  		config    = cmn.GCO.Get()
    75  		slab, err = core.T.PageMM().GetSlab(memsys.MaxPageSlabSize) // TODO: estimate
    76  	)
    77  	debug.AssertNoErr(err)
    78  
    79  	p.owt = cmn.OwtCopy
    80  	if p.kind == apc.ActETLBck {
    81  		p.owt = cmn.OwtTransform
    82  	}
    83  
    84  	smap := core.T.Sowner().Get()
    85  	p.xctn = newTCB(p, slab, config, smap)
    86  
    87  	// refcount OpcTxnDone; this target must ve active (ref: ignoreMaintenance)
    88  	if err := core.InMaintOrDecomm(smap, core.T.Snode(), p.xctn); err != nil {
    89  		return err
    90  	}
    91  	nat := smap.CountActiveTs()
    92  	p.xctn.refc.Store(int32(nat - 1))
    93  	p.xctn.wg.Add(1)
    94  
    95  	var sizePDU int32
    96  	if p.kind == apc.ActETLBck {
    97  		sizePDU = memsys.DefaultBufSize
    98  	}
    99  	if nat <= 1 {
   100  		return nil
   101  	}
   102  	return p.newDM(config, p.UUID(), sizePDU)
   103  }
   104  
   105  func (p *tcbFactory) newDM(config *cmn.Config, uuid string, sizePDU int32) error {
   106  	const trname = "tcb"
   107  	dmExtra := bundle.Extra{
   108  		RecvAck:     nil, // no ACKs
   109  		Config:      config,
   110  		Compression: config.TCB.Compression,
   111  		Multiplier:  config.TCB.SbundleMult,
   112  		SizePDU:     sizePDU,
   113  	}
   114  	// in re cmn.OwtPut: see comment inside _recv()
   115  	dm, err := bundle.NewDataMover(trname+"-"+uuid, p.xctn.recv, p.owt, dmExtra)
   116  	if err != nil {
   117  		return err
   118  	}
   119  	if err := dm.RegRecv(); err != nil {
   120  		return err
   121  	}
   122  	dm.SetXact(p.xctn)
   123  	p.xctn.dm = dm
   124  	return nil
   125  }
   126  
   127  func (p *tcbFactory) Kind() string   { return p.kind }
   128  func (p *tcbFactory) Get() core.Xact { return p.xctn }
   129  
   130  func (p *tcbFactory) WhenPrevIsRunning(prevEntry xreg.Renewable) (wpr xreg.WPR, err error) {
   131  	prev := prevEntry.(*tcbFactory)
   132  	if p.UUID() != prev.UUID() {
   133  		err = cmn.NewErrXactUsePrev(prevEntry.Get().String())
   134  		return
   135  	}
   136  	bckEq := prev.args.BckFrom.Equal(p.args.BckFrom, true /*same BID*/, true /*same backend*/)
   137  	debug.Assert(bckEq)
   138  	debug.Assert(prev.phase == apc.ActBegin && p.phase == apc.ActCommit)
   139  	prev.args.Phase = apc.ActCommit // transition
   140  	wpr = xreg.WprUse
   141  	return
   142  }
   143  
   144  /////////////
   145  // XactTCB //
   146  /////////////
   147  
   148  // copies one bucket _into_ another with or without transformation.
   149  // args.DP.Reader() is the reader to receive transformed bytes; when nil we do a plain bucket copy.
   150  
   151  // limited pre-run abort
   152  func (r *XactTCB) TxnAbort(err error) {
   153  	err = cmn.NewErrAborted(r.Name(), "tcb: txn-abort", err)
   154  	r.dm.Close(err)
   155  	r.dm.UnregRecv()
   156  	r.AddErr(err)
   157  	r.Base.Finish()
   158  }
   159  
   160  func newTCB(p *tcbFactory, slab *memsys.Slab, config *cmn.Config, smap *meta.Smap) (r *XactTCB) {
   161  	r = &XactTCB{p: p}
   162  
   163  	s1, s2 := r._str(), r.p.args.BckFrom.String()
   164  	r.nam = r.Base.Name() + " <= " + s2 + s1
   165  	r.str = r.Base.String() + " <= " + s2 + s1
   166  
   167  	var parallel int
   168  	if p.kind == apc.ActETLBck {
   169  		parallel = etlBucketParallelCnt // TODO: optimize with respect to disk bw and transforming computation
   170  	}
   171  	mpopts := &mpather.JgroupOpts{
   172  		CTs:      []string{fs.ObjectType},
   173  		VisitObj: r.do,
   174  		Prefix:   p.args.Msg.Prefix,
   175  		Slab:     slab,
   176  		Parallel: parallel,
   177  		DoLoad:   mpather.Load,
   178  		Throttle: true, // always trottling
   179  	}
   180  	mpopts.Bck.Copy(p.args.BckFrom.Bucket())
   181  	r.BckJog.Init(p.UUID(), p.kind, p.args.BckTo, mpopts, config)
   182  
   183  	if p.args.Msg.Sync {
   184  		debug.Assert(p.args.Msg.Prepend == "", p.args.Msg.Prepend) // validated (cli, P)
   185  		{
   186  			r.prune.parent = r
   187  			r.prune.smap = smap
   188  			r.prune.bckFrom = p.args.BckFrom
   189  			r.prune.bckTo = p.args.BckTo
   190  			r.prune.prefix = p.args.Msg.Prefix
   191  		}
   192  		r.prune.init(config)
   193  	}
   194  	return
   195  }
   196  
   197  func (r *XactTCB) WaitRunning() { r.wg.Wait() }
   198  
   199  func (r *XactTCB) Run(wg *sync.WaitGroup) {
   200  	if r.dm != nil {
   201  		r.dm.SetXact(r)
   202  		r.dm.Open()
   203  	}
   204  	wg.Done()
   205  
   206  	r.wg.Done()
   207  
   208  	r.BckJog.Run()
   209  	if r.p.args.Msg.Sync {
   210  		r.prune.run() // the 2nd jgroup
   211  	}
   212  	nlog.Infoln(r.Name())
   213  
   214  	err := r.BckJog.Wait()
   215  
   216  	if r.dm != nil {
   217  		o := transport.AllocSend()
   218  		o.Hdr.Opcode = OpcTxnDone
   219  		r.dm.Bcast(o, nil)
   220  
   221  		q := r.Quiesce(cmn.Rom.CplaneOperation(), r.qcb)
   222  		if q == core.QuiTimeout {
   223  			r.AddErr(fmt.Errorf("%s: %v", r, cmn.ErrQuiesceTimeout))
   224  		}
   225  
   226  		// close
   227  		r.dm.Close(err)
   228  		r.dm.UnregRecv()
   229  	}
   230  	if r.p.args.Msg.Sync {
   231  		r.prune.wait()
   232  	}
   233  	r.Finish()
   234  }
   235  
   236  func (r *XactTCB) qcb(tot time.Duration) core.QuiRes {
   237  	// TODO -- FIXME =======================
   238  	if cnt := r.ErrCnt(); cnt > 0 {
   239  		// to break quiescence - the waiter will look at r.Err() first anyway
   240  		return core.QuiTimeout
   241  	}
   242  
   243  	since := mono.Since(r.rxlast.Load())
   244  	if r.refc.Load() > 0 {
   245  		if since > cmn.Rom.MaxKeepalive() {
   246  			// idle on the Rx side despite having some (refc > 0) senders
   247  			if tot > r.BckJog.Config.Timeout.SendFile.D() {
   248  				return core.QuiTimeout
   249  			}
   250  		}
   251  		return core.QuiActive
   252  	}
   253  	if since > cmn.Rom.CplaneOperation() {
   254  		return core.QuiDone
   255  	}
   256  	return core.QuiInactiveCB
   257  }
   258  
   259  func (r *XactTCB) do(lom *core.LOM, buf []byte) (err error) {
   260  	var (
   261  		args   = r.p.args // TCBArgs
   262  		toName = args.Msg.ToName(lom.ObjName)
   263  	)
   264  	if cmn.Rom.FastV(5, cos.SmoduleXs) {
   265  		nlog.Infoln(r.Base.Name()+":", lom.Cname(), "=>", args.BckTo.Cname(toName))
   266  	}
   267  	coiParams := core.AllocCOI()
   268  	{
   269  		coiParams.DP = args.DP
   270  		coiParams.Xact = r
   271  		coiParams.Config = r.Config
   272  		coiParams.BckTo = args.BckTo
   273  		coiParams.ObjnameTo = toName
   274  		coiParams.Buf = buf
   275  		coiParams.OWT = r.p.owt
   276  		coiParams.DryRun = args.Msg.DryRun
   277  		coiParams.LatestVer = args.Msg.LatestVer
   278  		coiParams.Sync = args.Msg.Sync
   279  	}
   280  	_, err = core.T.CopyObject(lom, r.dm, coiParams)
   281  	core.FreeCOI(coiParams)
   282  	switch {
   283  	case err == nil:
   284  		if args.Msg.Sync {
   285  			r.prune.filter.Insert(cos.UnsafeB(lom.Uname()))
   286  		}
   287  	case cos.IsNotExist(err, 0):
   288  		// do nothing
   289  	case cos.IsErrOOS(err):
   290  		r.Abort(err)
   291  	default:
   292  		r.AddErr(err, 5, cos.SmoduleXs)
   293  	}
   294  	return
   295  }
   296  
   297  // NOTE: strict(est) error handling: abort on any of the errors below
   298  func (r *XactTCB) recv(hdr *transport.ObjHdr, objReader io.Reader, err error) error {
   299  	if err != nil && !cos.IsEOF(err) {
   300  		nlog.Errorln(err)
   301  		return err
   302  	}
   303  	// ref-count done-senders
   304  	if hdr.Opcode == OpcTxnDone {
   305  		refc := r.refc.Dec()
   306  		debug.Assert(refc >= 0)
   307  		return nil
   308  	}
   309  
   310  	debug.Assert(hdr.Opcode == 0)
   311  	lom := core.AllocLOM(hdr.ObjName)
   312  	err = r._recv(hdr, objReader, lom)
   313  	core.FreeLOM(lom)
   314  	transport.DrainAndFreeReader(objReader)
   315  	return err
   316  }
   317  
   318  func (r *XactTCB) _recv(hdr *transport.ObjHdr, objReader io.Reader, lom *core.LOM) error {
   319  	if err := lom.InitBck(&hdr.Bck); err != nil {
   320  		r.AddErr(err, 0)
   321  		return err
   322  	}
   323  	lom.CopyAttrs(&hdr.ObjAttrs, true /*skip cksum*/)
   324  	params := core.AllocPutParams()
   325  	{
   326  		params.WorkTag = fs.WorkfilePut
   327  		params.Reader = io.NopCloser(objReader)
   328  		params.Cksum = hdr.ObjAttrs.Cksum
   329  		params.Xact = r
   330  		params.Size = hdr.ObjAttrs.Size
   331  		params.OWT = r.p.owt
   332  	}
   333  	if lom.AtimeUnix() == 0 {
   334  		// TODO: sender must be setting it, remove this `if` when fixed
   335  		lom.SetAtimeUnix(time.Now().UnixNano())
   336  	}
   337  	params.Atime = lom.Atime()
   338  
   339  	erp := core.T.PutObject(lom, params)
   340  	core.FreePutParams(params)
   341  	if erp != nil {
   342  		r.AddErr(erp, 0)
   343  		return erp // NOTE: non-nil signals transport to terminate
   344  	}
   345  	r.rxlast.Store(mono.NanoTime())
   346  	return nil
   347  }
   348  
   349  func (r *XactTCB) Args() *xreg.TCBArgs { return r.p.args }
   350  
   351  func (r *XactTCB) _str() (s string) {
   352  	msg := &r.p.args.Msg.CopyBckMsg
   353  	if msg.Prefix != "" {
   354  		s = ", prefix " + r.p.args.Msg.Prefix
   355  	}
   356  	if msg.Prepend != "" {
   357  		s = ", prepend " + r.p.args.Msg.Prepend
   358  	}
   359  	if msg.LatestVer {
   360  		s = ", latest-ver"
   361  	}
   362  	if msg.Sync {
   363  		s = ", synchronize"
   364  	}
   365  	return s
   366  }
   367  
   368  func (r *XactTCB) String() string { return r.str }
   369  func (r *XactTCB) Name() string   { return r.nam }
   370  
   371  func (r *XactTCB) FromTo() (*meta.Bck, *meta.Bck) {
   372  	return r.p.args.BckFrom, r.p.args.BckTo
   373  }
   374  
   375  func (r *XactTCB) Snap() (snap *core.Snap) {
   376  	snap = &core.Snap{}
   377  	r.ToSnap(snap)
   378  
   379  	snap.IdleX = r.IsIdle()
   380  	f, t := r.FromTo()
   381  	snap.SrcBck, snap.DstBck = f.Clone(), t.Clone()
   382  	return
   383  }