github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/xact/xs/tcobjs.go (about)

     1  // Package xs is a collection of eXtended actions (xactions), including multi-object
     2  // operations, list-objects, (cluster) rebalance and (target) resilver, ETL, and more.
     3  /*
     4   * Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package xs
     7  
     8  import (
     9  	"fmt"
    10  	"io"
    11  	"runtime"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/NVIDIA/aistore/api/apc"
    16  	"github.com/NVIDIA/aistore/cmn"
    17  	"github.com/NVIDIA/aistore/cmn/atomic"
    18  	"github.com/NVIDIA/aistore/cmn/cos"
    19  	"github.com/NVIDIA/aistore/cmn/debug"
    20  	"github.com/NVIDIA/aistore/cmn/nlog"
    21  	"github.com/NVIDIA/aistore/core"
    22  	"github.com/NVIDIA/aistore/core/meta"
    23  	"github.com/NVIDIA/aistore/fs"
    24  	"github.com/NVIDIA/aistore/memsys"
    25  	"github.com/NVIDIA/aistore/transport"
    26  	"github.com/NVIDIA/aistore/xact"
    27  	"github.com/NVIDIA/aistore/xact/xreg"
    28  )
    29  
    30  const PrefixTcoID = "tco-"
    31  
    32  type (
    33  	tcoFactory struct {
    34  		args *xreg.TCObjsArgs
    35  		streamingF
    36  	}
    37  	XactTCObjs struct {
    38  		pending struct {
    39  			m   map[string]*tcowi
    40  			mtx sync.RWMutex
    41  		}
    42  		args     *xreg.TCObjsArgs
    43  		workCh   chan *cmn.TCObjsMsg
    44  		chanFull atomic.Int64
    45  		streamingX
    46  		owt cmn.OWT
    47  	}
    48  	tcowi struct {
    49  		r   *XactTCObjs
    50  		msg *cmn.TCObjsMsg
    51  		// finishing
    52  		refc atomic.Int32
    53  	}
    54  )
    55  
    56  // interface guard
    57  var (
    58  	_ core.Xact      = (*XactTCObjs)(nil)
    59  	_ xreg.Renewable = (*tcoFactory)(nil)
    60  	_ lrwi           = (*tcowi)(nil)
    61  )
    62  
    63  ////////////////
    64  // tcoFactory //
    65  ////////////////
    66  
    67  func (p *tcoFactory) New(args xreg.Args, bckFrom *meta.Bck) xreg.Renewable {
    68  	np := &tcoFactory{streamingF: streamingF{RenewBase: xreg.RenewBase{Args: args, Bck: bckFrom}, kind: p.kind}}
    69  	np.args = args.Custom.(*xreg.TCObjsArgs)
    70  	return np
    71  }
    72  
    73  func (p *tcoFactory) Start() error {
    74  	//
    75  	// target-local generation of a global UUID
    76  	//
    77  	uuid, err := p.genBEID(p.args.BckFrom, p.args.BckTo)
    78  	if err != nil {
    79  		return err
    80  	}
    81  	p.Args.UUID = PrefixTcoID + uuid
    82  
    83  	// new x-tco
    84  	workCh := make(chan *cmn.TCObjsMsg, maxNumInParallel)
    85  	r := &XactTCObjs{streamingX: streamingX{p: &p.streamingF, config: cmn.GCO.Get()}, args: p.args, workCh: workCh}
    86  	r.pending.m = make(map[string]*tcowi, maxNumInParallel)
    87  	r.owt = cmn.OwtCopy
    88  	if p.kind == apc.ActETLObjects {
    89  		r.owt = cmn.OwtTransform
    90  	}
    91  	p.xctn = r
    92  	r.DemandBase.Init(p.UUID(), p.Kind(), p.Bck, xact.IdleDefault)
    93  
    94  	var sizePDU int32
    95  	if p.kind == apc.ActETLObjects {
    96  		// unlike apc.ActCopyObjects (where we know the size)
    97  		// apc.ActETLObjects (transform) generates arbitrary sizes where we use PDU-based transport
    98  		sizePDU = memsys.DefaultBufSize
    99  	}
   100  
   101  	if err := p.newDM(p.Args.UUID /*trname*/, r.recv, r.config, r.owt, sizePDU); err != nil {
   102  		return err
   103  	}
   104  
   105  	if r.p.dm != nil {
   106  		p.dm.SetXact(r)
   107  		p.dm.Open()
   108  	}
   109  	xact.GoRunW(r)
   110  	return nil
   111  }
   112  
   113  ////////////////
   114  // XactTCObjs //
   115  ////////////////
   116  
   117  func (r *XactTCObjs) Name() string {
   118  	return fmt.Sprintf("%s => %s", r.streamingX.Name(), r.args.BckTo)
   119  }
   120  
   121  func (r *XactTCObjs) String() string {
   122  	return r.streamingX.String() + " => " + r.args.BckTo.String()
   123  }
   124  
   125  func (r *XactTCObjs) FromTo() (*meta.Bck, *meta.Bck) { return r.args.BckFrom, r.args.BckTo }
   126  
   127  func (r *XactTCObjs) Snap() (snap *core.Snap) {
   128  	snap = &core.Snap{}
   129  	r.ToSnap(snap)
   130  
   131  	snap.IdleX = r.IsIdle()
   132  	f, t := r.FromTo()
   133  	snap.SrcBck, snap.DstBck = f.Clone(), t.Clone()
   134  	return
   135  }
   136  
   137  func (r *XactTCObjs) Begin(msg *cmn.TCObjsMsg) {
   138  	wi := &tcowi{r: r, msg: msg}
   139  	r.pending.mtx.Lock()
   140  	r.pending.m[msg.TxnUUID] = wi
   141  	r.wiCnt.Inc()
   142  	r.pending.mtx.Unlock()
   143  }
   144  
   145  func (r *XactTCObjs) Run(wg *sync.WaitGroup) {
   146  	var err error
   147  	nlog.Infoln(r.Name())
   148  	wg.Done()
   149  	for {
   150  		select {
   151  		case msg := <-r.workCh:
   152  			var (
   153  				smap = core.T.Sowner().Get()
   154  				lrit = &lriterator{}
   155  			)
   156  			debug.Assert(cos.IsValidUUID(msg.TxnUUID), msg.TxnUUID) // (ref050724: in re: ais/plstcx)
   157  			r.pending.mtx.Lock()
   158  			wi, ok := r.pending.m[msg.TxnUUID]
   159  			r.pending.mtx.Unlock()
   160  			if !ok {
   161  				if r.ErrCnt() > 0 {
   162  					goto fin
   163  				}
   164  				nlog.Errorf("%s: expecting errors in %s, missing txn %q", core.T.String(), r.String(), msg.TxnUUID) // (unlikely)
   165  				continue
   166  			}
   167  
   168  			// this target must be active (ref: ignoreMaintenance)
   169  			if err = core.InMaintOrDecomm(smap, core.T.Snode(), r); err != nil {
   170  				nlog.Errorln(err)
   171  				goto fin
   172  			}
   173  			nat := smap.CountActiveTs()
   174  			wi.refc.Store(int32(nat - 1))
   175  
   176  			// run
   177  			var wg *sync.WaitGroup
   178  			if err = lrit.init(r, &msg.ListRange, r.Bck()); err == nil {
   179  				if msg.Sync && lrit.lrp != lrpList {
   180  					wg = &sync.WaitGroup{}
   181  					wg.Add(1)
   182  					go func(pt *cos.ParsedTemplate) {
   183  						r.prune(lrit, smap, pt)
   184  						wg.Done()
   185  					}(lrit.pt.Clone())
   186  				}
   187  				err = lrit.run(wi, smap)
   188  			}
   189  			if wg != nil {
   190  				wg.Wait()
   191  			}
   192  			lrit.wait()
   193  
   194  			if r.IsAborted() || err != nil {
   195  				goto fin
   196  			}
   197  			r.sendTerm(wi.msg.TxnUUID, nil, nil)
   198  			r.DecPending()
   199  		case <-r.IdleTimer():
   200  			goto fin
   201  		case <-r.ChanAbort():
   202  			goto fin
   203  		}
   204  	}
   205  fin:
   206  	r.fin(true /*unreg Rx*/)
   207  	if r.ErrCnt() > 0 {
   208  		// (see "expecting errors" and cleanup)
   209  		r.pending.mtx.Lock()
   210  		clear(r.pending.m)
   211  		r.pending.mtx.Unlock()
   212  	}
   213  }
   214  
   215  // more work
   216  func (r *XactTCObjs) Do(msg *cmn.TCObjsMsg) {
   217  	r.IncPending()
   218  	r.workCh <- msg
   219  
   220  	if l, c := len(r.workCh), cap(r.workCh); l > c/2 {
   221  		runtime.Gosched() // poor man's throttle
   222  		if l == c {
   223  			cnt := r.chanFull.Inc()
   224  			if (cnt >= 10 && cnt <= 20) || (cnt > 0 && cmn.Rom.FastV(5, cos.SmoduleXs)) {
   225  				nlog.Errorln("work channel full", r.Name())
   226  			}
   227  		}
   228  	}
   229  }
   230  
   231  //
   232  // Rx
   233  //
   234  
   235  // NOTE: strict(est) error handling: abort on any of the errors below
   236  func (r *XactTCObjs) recv(hdr *transport.ObjHdr, objReader io.Reader, err error) error {
   237  	if err != nil && !cos.IsEOF(err) {
   238  		goto ex
   239  	}
   240  
   241  	r.IncPending()
   242  	err = r._recv(hdr, objReader)
   243  	r.DecPending()
   244  	transport.DrainAndFreeReader(objReader)
   245  ex:
   246  	if err != nil && cmn.Rom.FastV(4, cos.SmoduleXs) {
   247  		nlog.Errorln(err)
   248  	}
   249  	return err
   250  }
   251  
   252  func (r *XactTCObjs) _recv(hdr *transport.ObjHdr, objReader io.Reader) error {
   253  	if hdr.Opcode == opcodeDone {
   254  		r.pending.mtx.Lock()
   255  		wi, ok := r.pending.m[cos.UnsafeS(hdr.Opaque)] // txnUUID
   256  		if !ok {
   257  			r.pending.mtx.Unlock()
   258  			_, err := r.JoinErr()
   259  			return err
   260  		}
   261  		refc := wi.refc.Dec()
   262  		if refc == 0 {
   263  			r.wiCnt.Dec()
   264  		}
   265  		r.pending.mtx.Unlock()
   266  		return nil
   267  	}
   268  
   269  	debug.Assert(hdr.Opcode == 0)
   270  	lom := core.AllocLOM(hdr.ObjName)
   271  	err := r._put(hdr, objReader, lom)
   272  	core.FreeLOM(lom)
   273  	return err
   274  }
   275  
   276  func (r *XactTCObjs) _put(hdr *transport.ObjHdr, objReader io.Reader, lom *core.LOM) (err error) {
   277  	if err = lom.InitBck(&hdr.Bck); err != nil {
   278  		return
   279  	}
   280  	lom.CopyAttrs(&hdr.ObjAttrs, true /*skip cksum*/)
   281  	params := core.AllocPutParams()
   282  	{
   283  		params.WorkTag = fs.WorkfilePut
   284  		params.Reader = io.NopCloser(objReader)
   285  		params.Cksum = hdr.ObjAttrs.Cksum
   286  		params.Xact = r
   287  		params.Size = hdr.ObjAttrs.Size
   288  		params.OWT = r.owt
   289  	}
   290  	if lom.AtimeUnix() == 0 {
   291  		// TODO: sender must be setting it, remove this `if` when fixed
   292  		lom.SetAtimeUnix(time.Now().UnixNano())
   293  	}
   294  	params.Atime = lom.Atime()
   295  	err = core.T.PutObject(lom, params)
   296  	core.FreePutParams(params)
   297  
   298  	if err != nil {
   299  		r.AddErr(err, 5, cos.SmoduleXs)
   300  	} else if cmn.Rom.FastV(5, cos.SmoduleXs) {
   301  		nlog.Infof("%s: tco-Rx %s, size=%d", r.Base.Name(), lom.Cname(), hdr.ObjAttrs.Size)
   302  	}
   303  	return
   304  }
   305  
   306  ///////////
   307  // tcowi //
   308  ///////////
   309  
   310  func (wi *tcowi) do(lom *core.LOM, lrit *lriterator) {
   311  	var (
   312  		objNameTo = wi.msg.ToName(lom.ObjName)
   313  		buf, slab = core.T.PageMM().Alloc()
   314  	)
   315  
   316  	// under ETL, the returned sizes of transformed objects are unknown (`cos.ContentLengthUnknown`)
   317  	// until after the transformation; here we are disregarding the size anyway as the stats
   318  	// are done elsewhere
   319  
   320  	coiParams := core.AllocCOI()
   321  	{
   322  		coiParams.DP = wi.r.args.DP
   323  		coiParams.Xact = wi.r
   324  		coiParams.Config = wi.r.config
   325  		coiParams.BckTo = wi.r.args.BckTo
   326  		coiParams.ObjnameTo = objNameTo
   327  		coiParams.Buf = buf
   328  		coiParams.OWT = wi.r.owt
   329  		coiParams.DryRun = wi.msg.DryRun
   330  		coiParams.LatestVer = wi.msg.LatestVer
   331  		coiParams.Sync = wi.msg.Sync
   332  	}
   333  	_, err := core.T.CopyObject(lom, wi.r.p.dm, coiParams)
   334  	core.FreeCOI(coiParams)
   335  	slab.Free(buf)
   336  
   337  	if err != nil {
   338  		if !cos.IsNotExist(err, 0) || lrit.lrp == lrpList {
   339  			wi.r.AddErr(err, 5, cos.SmoduleXs)
   340  		}
   341  	} else if cmn.Rom.FastV(5, cos.SmoduleXs) {
   342  		nlog.Infoln(wi.r.Name()+":", lom.Cname(), "=>", wi.r.args.BckTo.Cname(objNameTo))
   343  	}
   344  }
   345  
   346  //
   347  // remove objects not present at the source (when synchronizing bckFrom => bckTo)
   348  // TODO: probabilistic filtering
   349  //
   350  
   351  type syncwi struct {
   352  	rp *prune
   353  }
   354  
   355  // interface guard
   356  var _ lrwi = (*syncwi)(nil)
   357  
   358  func (r *XactTCObjs) prune(lrit *lriterator, smap *meta.Smap, pt *cos.ParsedTemplate) {
   359  	rp := prune{parent: r, smap: smap}
   360  	rp.bckFrom, rp.bckTo = r.FromTo()
   361  
   362  	// tcb use case
   363  	if lrit.lrp == lrpPrefix {
   364  		rp.prefix = lrit.prefix
   365  		rp.init(r.config)
   366  		rp.run()
   367  		rp.wait()
   368  		return
   369  	}
   370  
   371  	// same range iterator but different bucket
   372  	var syncit lriterator
   373  	debug.Assert(lrit.lrp == lrpRange)
   374  
   375  	err := syncit.init(lrit.parent, lrit.msg, rp.bckTo)
   376  	debug.AssertNoErr(err)
   377  	syncit.pt = pt
   378  	syncwi := &syncwi{&rp} // reusing only prune.do (and not init/run/wait)
   379  	syncit.run(syncwi, smap)
   380  	syncit.wait()
   381  }
   382  
   383  func (syncwi *syncwi) do(lom *core.LOM, _ *lriterator) {
   384  	syncwi.rp.do(lom, nil)
   385  }