github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/mirror/put_copies.go (about)

     1  // Package mirror provides local mirroring and replica management
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package mirror
     6  
     7  import (
     8  	"fmt"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/NVIDIA/aistore/api/apc"
    13  	"github.com/NVIDIA/aistore/cmn"
    14  	"github.com/NVIDIA/aistore/cmn/atomic"
    15  	"github.com/NVIDIA/aistore/cmn/cos"
    16  	"github.com/NVIDIA/aistore/cmn/debug"
    17  	"github.com/NVIDIA/aistore/cmn/mono"
    18  	"github.com/NVIDIA/aistore/cmn/nlog"
    19  	"github.com/NVIDIA/aistore/core"
    20  	"github.com/NVIDIA/aistore/core/meta"
    21  	"github.com/NVIDIA/aistore/fs"
    22  	"github.com/NVIDIA/aistore/fs/mpather"
    23  	"github.com/NVIDIA/aistore/memsys"
    24  	"github.com/NVIDIA/aistore/xact"
    25  	"github.com/NVIDIA/aistore/xact/xreg"
    26  )
    27  
    28  type (
    29  	putFactory struct {
    30  		xreg.RenewBase
    31  		xctn *XactPut
    32  		lom  *core.LOM
    33  	}
    34  	XactPut struct {
    35  		// implements core.Xact interface
    36  		xact.DemandBase
    37  		// runtime
    38  		workers  *mpather.WorkerGroup
    39  		workCh   chan core.LIF
    40  		chanFull atomic.Int64
    41  		// init
    42  		mirror cmn.MirrorConf
    43  		config *cmn.Config
    44  	}
    45  )
    46  
    47  // interface guard
    48  var (
    49  	_ core.Xact      = (*XactPut)(nil)
    50  	_ xreg.Renewable = (*putFactory)(nil)
    51  )
    52  
    53  ////////////////
    54  // putFactory //
    55  ////////////////
    56  
    57  func (*putFactory) New(args xreg.Args, bck *meta.Bck) xreg.Renewable {
    58  	p := &putFactory{RenewBase: xreg.RenewBase{Args: args, Bck: bck}, lom: args.Custom.(*core.LOM)}
    59  	return p
    60  }
    61  
    62  func (p *putFactory) Start() error {
    63  	lom := p.lom
    64  	slab, err := core.T.PageMM().GetSlab(memsys.MaxPageSlabSize) // TODO: estimate
    65  	debug.AssertNoErr(err)
    66  
    67  	bck, mirror := lom.Bck(), lom.MirrorConf()
    68  	if !mirror.Enabled {
    69  		return fmt.Errorf("%s: mirroring disabled, nothing to do", bck)
    70  	}
    71  	if err = fs.ValidateNCopies(core.T.String(), int(mirror.Copies)); err != nil {
    72  		nlog.Errorln(err)
    73  		return err
    74  	}
    75  	r := &XactPut{mirror: *mirror, workCh: make(chan core.LIF, mirror.Burst)}
    76  
    77  	//
    78  	// target-local generation of a global UUID
    79  	//
    80  	div := uint64(xact.IdleDefault)
    81  	beid, _, _ := xreg.GenBEID(div, p.Kind()+"|"+bck.MakeUname(""))
    82  	if beid == "" {
    83  		// is Ok (compare with x-archive, x-tco)
    84  		beid = cos.GenUUID()
    85  	}
    86  	r.DemandBase.Init(beid, p.Kind(), bck, xact.IdleDefault)
    87  
    88  	// joggers
    89  	r.workers = mpather.NewWorkerGroup(&mpather.WorkerGroupOpts{
    90  		Callback:  r.do,
    91  		Slab:      slab,
    92  		QueueSize: mirror.Burst,
    93  	})
    94  	p.xctn = r
    95  
    96  	// run
    97  	go r.Run(nil)
    98  	return nil
    99  }
   100  
   101  func (*putFactory) Kind() string     { return apc.ActPutCopies }
   102  func (p *putFactory) Get() core.Xact { return p.xctn }
   103  
   104  func (p *putFactory) WhenPrevIsRunning(xprev xreg.Renewable) (xreg.WPR, error) {
   105  	debug.Assertf(false, "%s vs %s", p.Str(p.Kind()), xprev) // xreg.usePrev() must've returned true
   106  	return xreg.WprUse, nil
   107  }
   108  
   109  /////////////
   110  // XactPut //
   111  /////////////
   112  
   113  // (one worker per mountpath)
   114  func (r *XactPut) do(lom *core.LOM, buf []byte) {
   115  	copies := int(lom.Bprops().Mirror.Copies)
   116  
   117  	lom.Lock(true)
   118  	size, err := addCopies(lom, copies, buf)
   119  	lom.Unlock(true)
   120  
   121  	if err != nil {
   122  		r.AddErr(err, 5, cos.SmoduleMirror)
   123  	} else {
   124  		r.ObjsAdd(1, size)
   125  	}
   126  	r.DecPending() // (see IncPending below)
   127  	core.FreeLOM(lom)
   128  }
   129  
   130  // control logic: stop and idle timer
   131  // (LOMs get dispatched directly to workers)
   132  func (r *XactPut) Run(*sync.WaitGroup) {
   133  	var err error
   134  	nlog.Infoln(r.Name())
   135  	r.config = cmn.GCO.Get()
   136  	r.workers.Run()
   137  loop:
   138  	for {
   139  		select {
   140  		case <-r.IdleTimer():
   141  			r.waitPending()
   142  			break loop
   143  		case <-r.ChanAbort():
   144  			break loop
   145  		}
   146  	}
   147  
   148  	err = r.stop()
   149  	if err != nil {
   150  		r.AddErr(err)
   151  	}
   152  	r.Finish()
   153  }
   154  
   155  // main method
   156  func (r *XactPut) Repl(lom *core.LOM) {
   157  	debug.Assert(!r.Finished(), r.String())
   158  
   159  	// ref-count on-demand, decrement via worker.Callback = r.do
   160  	r.IncPending()
   161  	chanFull, err := r.workers.PostLIF(lom)
   162  	if err != nil {
   163  		r.DecPending()
   164  		r.Abort(fmt.Errorf("%s: %v", r, err))
   165  	}
   166  	if chanFull {
   167  		r.chanFull.Inc()
   168  	}
   169  }
   170  
   171  func (r *XactPut) waitPending() {
   172  	const minsleep, longtime = 4 * time.Second, 30 * time.Second
   173  	var (
   174  		started     int64
   175  		cnt, iniCnt int
   176  		sleep       = max(cmn.Rom.MaxKeepalive(), minsleep)
   177  	)
   178  	if cnt = len(r.workCh); cnt == 0 {
   179  		return
   180  	}
   181  	started, iniCnt = mono.NanoTime(), cnt
   182  	// keep sleeping until the very end
   183  	for cnt > 0 {
   184  		r.IncPending()
   185  		time.Sleep(sleep)
   186  		r.DecPending()
   187  		cnt = len(r.workCh)
   188  	}
   189  	if d := mono.Since(started); d > longtime {
   190  		nlog.Infof("%s: took a while to finish %d pending copies: %v", r, iniCnt, d)
   191  	}
   192  }
   193  
   194  func (r *XactPut) stop() (err error) {
   195  	r.DemandBase.Stop()
   196  	n := r.workers.Stop()
   197  	if nn := drainWorkCh(r.workCh); nn > 0 {
   198  		n += nn
   199  	}
   200  	if n > 0 {
   201  		r.SubPending(n)
   202  		err = fmt.Errorf("%s: dropped %d object%s", r, n, cos.Plural(n))
   203  	}
   204  	if cnt := r.chanFull.Load(); (cnt >= 10 && cnt <= 20) || (cnt > 0 && cmn.Rom.FastV(5, cos.SmoduleMirror)) {
   205  		nlog.Errorln("work channel full (all mp workers)", r.String(), cnt)
   206  	}
   207  	return
   208  }
   209  
   210  func (r *XactPut) Snap() (snap *core.Snap) {
   211  	snap = &core.Snap{}
   212  	r.ToSnap(snap)
   213  
   214  	snap.IdleX = r.IsIdle()
   215  	return
   216  }