github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ec/putxaction.go (about)

     1  // Package ec provides erasure coding (EC) based data protection for AIStore.
     2  /*
     3  * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ec
     6  
     7  import (
     8  	"fmt"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/NVIDIA/aistore/api/apc"
    13  	"github.com/NVIDIA/aistore/cmn"
    14  	"github.com/NVIDIA/aistore/cmn/cos"
    15  	"github.com/NVIDIA/aistore/cmn/debug"
    16  	"github.com/NVIDIA/aistore/cmn/nlog"
    17  	"github.com/NVIDIA/aistore/core"
    18  	"github.com/NVIDIA/aistore/core/meta"
    19  	"github.com/NVIDIA/aistore/fs"
    20  	"github.com/NVIDIA/aistore/xact"
    21  	"github.com/NVIDIA/aistore/xact/xreg"
    22  )
    23  
    24  type (
    25  	putFactory struct {
    26  		xreg.RenewBase
    27  		xctn *XactPut
    28  	}
    29  	// Erasure coding runner: accepts requests and dispatches them to
    30  	// a correct mountpath runner. Runner uses dedicated to EC memory manager
    31  	// inherited by dependent mountpath runners
    32  	XactPut struct {
    33  		xactECBase
    34  		xactReqBase
    35  		putJoggers map[string]*putJogger // mountpath joggers for PUT/DEL
    36  	}
    37  	// extended x-ec-put statistics
    38  	ExtECPutStats struct {
    39  		AvgEncodeTime  cos.Duration `json:"ec.encode.ns"`
    40  		AvgDeleteTime  cos.Duration `json:"ec.delete.ns"`
    41  		EncodeCount    int64        `json:"ec.encode.n,string"`
    42  		DeleteCount    int64        `json:"ec.delete.n,string"`
    43  		EncodeSize     int64        `json:"ec.encode.size,string"`
    44  		EncodeErrCount int64        `json:"ec.encode.err.n,string"`
    45  		DeleteErrCount int64        `json:"ec.delete.err.n,string"`
    46  		AvgObjTime     cos.Duration `json:"ec.obj.process.ns"`
    47  		AvgQueueLen    float64      `json:"ec.queue.len.f"`
    48  		IsIdle         bool         `json:"is_idle"`
    49  	}
    50  )
    51  
    52  // interface guard
    53  var (
    54  	_ xact.Demand    = (*XactPut)(nil)
    55  	_ xreg.Renewable = (*putFactory)(nil)
    56  )
    57  
    58  ////////////////
    59  // putFactory //
    60  ////////////////
    61  
    62  func (*putFactory) New(_ xreg.Args, bck *meta.Bck) xreg.Renewable {
    63  	p := &putFactory{RenewBase: xreg.RenewBase{Bck: bck}}
    64  	return p
    65  }
    66  
    67  func (p *putFactory) Start() error {
    68  	xec := ECM.NewPutXact(p.Bck.Bucket())
    69  	xec.DemandBase.Init(cos.GenUUID(), p.Kind(), p.Bck, 0 /*use default*/)
    70  	p.xctn = xec
    71  	go xec.Run(nil)
    72  	return nil
    73  }
    74  
    75  func (*putFactory) Kind() string     { return apc.ActECPut }
    76  func (p *putFactory) Get() core.Xact { return p.xctn }
    77  
    78  func (p *putFactory) WhenPrevIsRunning(xprev xreg.Renewable) (xreg.WPR, error) {
    79  	debug.Assertf(false, "%s vs %s", p.Str(p.Kind()), xprev) // xreg.usePrev() must've returned true
    80  	return xreg.WprUse, nil
    81  }
    82  
    83  /////////////
    84  // XactPut //
    85  /////////////
    86  
    87  func newPutXact(bck *cmn.Bck, mgr *Manager) *XactPut {
    88  	var (
    89  		avail, disabled = fs.Get()
    90  		totalPaths      = len(avail) + len(disabled)
    91  		config          = cmn.GCO.Get()
    92  		xctn            = &XactPut{
    93  			putJoggers: make(map[string]*putJogger, totalPaths),
    94  		}
    95  	)
    96  	xctn.xactECBase.init(config, bck, mgr)
    97  	xctn.xactReqBase.init()
    98  
    99  	// create all runners but do not start them until Run is called
   100  	for mpath := range avail {
   101  		putJog := xctn.newPutJogger(mpath)
   102  		xctn.putJoggers[mpath] = putJog
   103  	}
   104  	for mpath := range disabled {
   105  		putJog := xctn.newPutJogger(mpath)
   106  		xctn.putJoggers[mpath] = putJog
   107  	}
   108  	return xctn
   109  }
   110  
   111  func (r *XactPut) newPutJogger(mpath string) *putJogger {
   112  	j := &putJogger{
   113  		parent: r,
   114  		mpath:  mpath,
   115  		putCh:  make(chan *request, requestBufSizeFS),
   116  		xactCh: make(chan *request, requestBufSizeEncode),
   117  	}
   118  	j.stopCh.Init()
   119  	return j
   120  }
   121  
   122  func (r *XactPut) dispatchRequest(req *request, lom *core.LOM) error {
   123  	debug.Assert(req.Action == ActDelete || req.Action == ActSplit, req.Action)
   124  	debug.Assert(req.ErrCh == nil, "ec-put does not support ErrCh")
   125  	if !r.ecRequestsEnabled() {
   126  		return ErrorECDisabled
   127  	}
   128  	switch req.Action {
   129  	case ActSplit:
   130  		r.stats.updateEncode(lom.SizeBytes())
   131  	case ActDelete:
   132  		r.stats.updateDelete()
   133  	default:
   134  		return fmt.Errorf("invalid request's action %s for putxaction", req.Action)
   135  	}
   136  
   137  	jogger, ok := r.putJoggers[lom.Mountpath().Path]
   138  	if !ok {
   139  		debug.Assert(false, "invalid "+lom.Mountpath().String())
   140  	}
   141  	if cmn.Rom.FastV(4, cos.SmoduleEC) {
   142  		nlog.Infof("ECPUT (bg queue = %d): dispatching object %s....", len(jogger.putCh), lom)
   143  	}
   144  	if req.rebuild {
   145  		jogger.xactCh <- req
   146  	} else {
   147  		r.stats.updateQueue(len(jogger.putCh))
   148  		jogger.putCh <- req
   149  	}
   150  	return nil
   151  }
   152  
   153  func (r *XactPut) Run(*sync.WaitGroup) {
   154  	nlog.Infoln(r.Name())
   155  
   156  	var wg sync.WaitGroup
   157  	for _, jog := range r.putJoggers {
   158  		wg.Add(1)
   159  		go jog.run(&wg)
   160  	}
   161  
   162  	ticker := time.NewTicker(r.config.Periodic.StatsTime.D())
   163  	r.mainLoop(ticker)
   164  	ticker.Stop()
   165  	wg.Wait()
   166  	// not closing stream bundles as they are shared across EC xactions
   167  	r.Finish()
   168  }
   169  
   170  // all requests are equal, throttle TODO
   171  func (r *XactPut) mainLoop(ticker *time.Ticker) {
   172  	for {
   173  		select {
   174  		case <-ticker.C:
   175  			if cmn.Rom.FastV(4, cos.SmoduleEC) {
   176  				if s := fmt.Sprintf("%v", r.Snap()); s != "" {
   177  					nlog.Infoln(s)
   178  				}
   179  			}
   180  		case <-r.IdleTimer():
   181  			// It's OK not to notify ecmanager, it'll just have stopped xctn in a map.
   182  			r.stop()
   183  			return
   184  		case msg := <-r.controlCh:
   185  			if msg.Action == ActEnableRequests {
   186  				r.setEcRequestsEnabled()
   187  				break
   188  			}
   189  			debug.Assert(msg.Action == ActClearRequests, msg.Action)
   190  
   191  			r.setEcRequestsDisabled()
   192  			r.stop()
   193  			return
   194  		case <-r.ChanAbort():
   195  			r.stop()
   196  			return
   197  		}
   198  	}
   199  }
   200  
   201  func (r *XactPut) Stop(err error) { r.Abort(err) }
   202  
   203  func (r *XactPut) stop() {
   204  	r.DemandBase.Stop()
   205  	for _, jog := range r.putJoggers {
   206  		jog.stop()
   207  	}
   208  
   209  	// Don't close bundles, they are shared between bucket's EC actions
   210  	r.Finish()
   211  }
   212  
   213  // Encode schedules FQN for erasure coding process
   214  func (r *XactPut) encode(req *request, lom *core.LOM) {
   215  	now := time.Now()
   216  	req.putTime, req.tm = now, now
   217  	if err := r.dispatchRequest(req, lom); err != nil {
   218  		nlog.Errorf("Failed to encode %s: %v", lom, err)
   219  		freeReq(req)
   220  	}
   221  }
   222  
   223  // Cleanup deletes all object slices or copies after the main object is removed
   224  func (r *XactPut) cleanup(req *request, lom *core.LOM) {
   225  	now := time.Now()
   226  	req.putTime, req.tm = now, now
   227  
   228  	if err := r.dispatchRequest(req, lom); err != nil {
   229  		nlog.Errorf("Failed to cleanup %s: %v", lom, err)
   230  		freeReq(req)
   231  	}
   232  }
   233  
   234  func (r *XactPut) Snap() (snap *core.Snap) {
   235  	snap = r.baseSnap()
   236  	st := r.stats.stats()
   237  	snap.Ext = &ExtECPutStats{
   238  		AvgEncodeTime:  cos.Duration(st.EncodeTime),
   239  		EncodeSize:     st.EncodeSize,
   240  		EncodeCount:    st.PutReq,
   241  		EncodeErrCount: st.EncodeErr,
   242  		AvgDeleteTime:  cos.Duration(st.DeleteTime),
   243  		DeleteErrCount: st.DeleteErr,
   244  		DeleteCount:    st.DelReq,
   245  		AvgObjTime:     cos.Duration(st.ObjTime),
   246  		AvgQueueLen:    st.QueueLen,
   247  		IsIdle:         r.Pending() == 0,
   248  	}
   249  
   250  	snap.Stats.Objs = st.PutReq + st.DelReq // TODO: support in and out
   251  	snap.Stats.Bytes = st.EncodeSize
   252  	return
   253  }