github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/xact/xs/streaming.go (about)

     1  // Package xs is a collection of eXtended actions (xactions), including multi-object
     2  // operations, list-objects, (cluster) rebalance and (target) resilver, ETL, and more.
     3  /*
     4   * Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package xs
     7  
     8  import (
     9  	"strconv"
    10  	"time"
    11  
    12  	"github.com/NVIDIA/aistore/cmn"
    13  	"github.com/NVIDIA/aistore/cmn/atomic"
    14  	"github.com/NVIDIA/aistore/cmn/cos"
    15  	"github.com/NVIDIA/aistore/cmn/debug"
    16  	"github.com/NVIDIA/aistore/cmn/nlog"
    17  	"github.com/NVIDIA/aistore/core"
    18  	"github.com/NVIDIA/aistore/core/meta"
    19  	"github.com/NVIDIA/aistore/hk"
    20  	"github.com/NVIDIA/aistore/transport"
    21  	"github.com/NVIDIA/aistore/transport/bundle"
    22  	"github.com/NVIDIA/aistore/xact"
    23  	"github.com/NVIDIA/aistore/xact/xreg"
    24  )
    25  
    26  //
    27  // multi-object on-demand (transactional) xactions - common logic
    28  //
    29  
    30  const (
    31  	opcodeDone = iota + 27182
    32  	opcodeAbrt
    33  )
    34  
    35  const (
    36  	waitRegRecv   = 4 * time.Second
    37  	waitUnregRecv = 2 * waitRegRecv
    38  	waitUnregMax  = 2 * waitUnregRecv
    39  
    40  	maxNumInParallel = 256
    41  )
    42  
    43  type (
    44  	streamingF struct {
    45  		xreg.RenewBase
    46  		xctn core.Xact
    47  		dm   *bundle.DataMover
    48  		kind string
    49  	}
    50  	streamingX struct {
    51  		p      *streamingF
    52  		config *cmn.Config
    53  		xact.DemandBase
    54  		wiCnt atomic.Int32
    55  		maxWt time.Duration
    56  	}
    57  )
    58  
    59  //
    60  // (common factory part)
    61  //
    62  
    63  func (p *streamingF) Kind() string   { return p.kind }
    64  func (p *streamingF) Get() core.Xact { return p.xctn }
    65  
    66  func (p *streamingF) WhenPrevIsRunning(xprev xreg.Renewable) (xreg.WPR, error) {
    67  	debug.Assertf(false, "%s vs %s", p.Str(p.Kind()), xprev) // xreg.usePrev() must've returned true
    68  	return xreg.WprUse, nil
    69  }
    70  
    71  // NOTE: transport endpoint (aka "trname") identifies the flow and MUST be identical
    72  // across all participating targets. The mechanism involves generating so-called "best-effort UUID"
    73  // independently on (by) all targets and using the latter as both xaction ID and receive endpoint (trname)
    74  // for target=>target streams.
    75  
    76  func (p *streamingF) genBEID(fromBck, toBck *meta.Bck) (string, error) {
    77  	var (
    78  		div = uint64(xact.IdleDefault)
    79  		bmd = core.T.Bowner().Get()
    80  		tag = p.kind + "|" + fromBck.MakeUname("") + "|" + toBck.MakeUname("") + "|" + strconv.FormatInt(bmd.Version, 10)
    81  	)
    82  	beid, prev, err := xreg.GenBEID(div, tag)
    83  	if beid != "" {
    84  		debug.Assert(err == nil && prev == nil)
    85  		return beid, nil
    86  	}
    87  	if prev != nil {
    88  		err = cmn.NewErrBusy("node", core.T.String(), "running "+prev.Name())
    89  	}
    90  	return "", err
    91  }
    92  
    93  func (p *streamingF) newDM(trname string, recv transport.RecvObj, config *cmn.Config, owt cmn.OWT, sizePDU int32) (err error) {
    94  	smap := core.T.Sowner().Get()
    95  	if err := core.InMaintOrDecomm(smap, core.T.Snode(), p.xctn); err != nil {
    96  		return err
    97  	}
    98  	if smap.CountActiveTs() <= 1 {
    99  		return nil
   100  	}
   101  
   102  	// consider adding config.X.Compression, config.X.SbundleMult (currently, always 1), etc.
   103  	dmxtra := bundle.Extra{Config: config, Multiplier: 1, SizePDU: sizePDU}
   104  	p.dm, err = bundle.NewDataMover(trname, recv, owt, dmxtra)
   105  	if err != nil {
   106  		return err
   107  	}
   108  	if err = p.dm.RegRecv(); err == nil {
   109  		return nil
   110  	}
   111  
   112  	nlog.Errorln(err)
   113  	sleep := cos.ProbingFrequency(waitRegRecv)
   114  	for total := time.Duration(0); err != nil && transport.IsErrDuplicateTrname(err) && total < waitRegRecv; total += sleep {
   115  		time.Sleep(sleep)
   116  		err = p.dm.RegRecv()
   117  	}
   118  	return err
   119  }
   120  
   121  func (r *streamingX) String() (s string) {
   122  	s = r.DemandBase.String()
   123  	if r.p.dm == nil {
   124  		return
   125  	}
   126  	return s + "-" + r.p.dm.String()
   127  }
   128  
   129  // limited pre-run abort
   130  func (r *streamingX) TxnAbort(err error) {
   131  	err = cmn.NewErrAborted(r.Name(), "txn-abort", err)
   132  	r.p.dm.Close(err)
   133  	r.p.dm.UnregRecv()
   134  	r.AddErr(err)
   135  	r.Base.Finish()
   136  }
   137  
   138  func (r *streamingX) sendTerm(uuid string, tsi *meta.Snode, err error) {
   139  	if r.p.dm == nil { // single target
   140  		return
   141  	}
   142  	o := transport.AllocSend()
   143  	o.Hdr.SID = core.T.SID()
   144  	o.Hdr.Opaque = []byte(uuid)
   145  	if err == nil {
   146  		o.Hdr.Opcode = opcodeDone
   147  	} else {
   148  		o.Hdr.Opcode = opcodeAbrt
   149  		o.Hdr.ObjName = err.Error()
   150  	}
   151  	if tsi != nil {
   152  		r.p.dm.Send(o, nil, tsi) // to the responsible target
   153  	} else {
   154  		r.p.dm.Bcast(o, nil) // to all
   155  	}
   156  }
   157  
   158  func (r *streamingX) fin(unreg bool) {
   159  	if r.DemandBase.Finished() {
   160  		// must be aborted
   161  		r.p.dm.Close(r.Err())
   162  		r.p.dm.UnregRecv()
   163  		return
   164  	}
   165  
   166  	r.DemandBase.Stop()
   167  	r.p.dm.Close(r.Err())
   168  	r.Finish()
   169  	if unreg && r.p.dm != nil {
   170  		r.maxWt = 0
   171  		hk.Reg(r.ID()+hk.NameSuffix, r.wurr, waitUnregRecv) // compare w/ lso
   172  	}
   173  }
   174  
   175  func (r *streamingX) wurr() time.Duration {
   176  	if cnt := r.wiCnt.Load(); cnt > 0 {
   177  		r.maxWt += waitUnregRecv
   178  		if r.maxWt < waitUnregMax {
   179  			return waitUnregRecv
   180  		}
   181  		nlog.Errorf("%s: unreg timeout %v, cnt %d", r, r.maxWt, cnt)
   182  	}
   183  	r.p.dm.UnregRecv()
   184  	return hk.UnregInterval
   185  }