github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ec/respondxaction.go (about)

     1  // Package ec provides erasure coding (EC) based data protection for AIStore.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ec
     6  
     7  import (
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/NVIDIA/aistore/api/apc"
    15  	"github.com/NVIDIA/aistore/cmn"
    16  	"github.com/NVIDIA/aistore/cmn/cos"
    17  	"github.com/NVIDIA/aistore/cmn/debug"
    18  	"github.com/NVIDIA/aistore/cmn/nlog"
    19  	"github.com/NVIDIA/aistore/core"
    20  	"github.com/NVIDIA/aistore/core/meta"
    21  	"github.com/NVIDIA/aistore/fs"
    22  	"github.com/NVIDIA/aistore/transport"
    23  	"github.com/NVIDIA/aistore/xact"
    24  	"github.com/NVIDIA/aistore/xact/xreg"
    25  )
    26  
    27  type (
    28  	rspFactory struct {
    29  		xreg.RenewBase
    30  		xctn *XactRespond
    31  	}
    32  	// Xaction responsible for responding to EC requests of other targets.
    33  	// Should not be stopped if number of known targets is small.
    34  	XactRespond struct {
    35  		xactECBase
    36  	}
    37  )
    38  
    39  // interface guard
    40  var (
    41  	_ xact.Demand    = (*XactRespond)(nil)
    42  	_ xreg.Renewable = (*rspFactory)(nil)
    43  )
    44  
    45  ////////////////
    46  // rspFactory //
    47  ////////////////
    48  
    49  func (*rspFactory) New(_ xreg.Args, bck *meta.Bck) xreg.Renewable {
    50  	p := &rspFactory{RenewBase: xreg.RenewBase{Bck: bck}}
    51  	return p
    52  }
    53  
    54  func (*rspFactory) Kind() string     { return apc.ActECRespond }
    55  func (p *rspFactory) Get() core.Xact { return p.xctn }
    56  
    57  func (p *rspFactory) WhenPrevIsRunning(xprev xreg.Renewable) (xreg.WPR, error) {
    58  	debug.Assertf(false, "%s vs %s", p.Str(p.Kind()), xprev) // xreg.usePrev() must've returned true
    59  	return xreg.WprUse, nil
    60  }
    61  
    62  func (p *rspFactory) Start() error {
    63  	xec := ECM.NewRespondXact(p.Bck.Bucket())
    64  	xec.DemandBase.Init(cos.GenUUID(), p.Kind(), p.Bck, 0 /*use default*/)
    65  	p.xctn = xec
    66  	go xec.Run(nil)
    67  	return nil
    68  }
    69  
    70  /////////////////
    71  // XactRespond //
    72  /////////////////
    73  
    74  func newRespondXact(bck *cmn.Bck, mgr *Manager) *XactRespond {
    75  	xctn := &XactRespond{}
    76  	xctn.xactECBase.init(cmn.GCO.Get(), bck, mgr)
    77  	return xctn
    78  }
    79  
    80  func (r *XactRespond) Run(*sync.WaitGroup) {
    81  	nlog.Infoln(r.Name())
    82  
    83  	ticker := time.NewTicker(r.config.Periodic.StatsTime.D())
    84  	defer ticker.Stop()
    85  
    86  	// as of now all requests are equal (TODO: throttle)
    87  	for {
    88  		select {
    89  		case <-ticker.C:
    90  			if s := r.stats.stats().String(); s != "" {
    91  				nlog.Infoln(s)
    92  			}
    93  		case <-r.IdleTimer():
    94  			r.stop()
    95  			return
    96  		case <-r.ChanAbort():
    97  			r.stop()
    98  			return
    99  		}
   100  	}
   101  }
   102  
   103  // Utility function to cleanup both object/slice and its meta on the local node
   104  // Used when processing object deletion request
   105  func (*XactRespond) removeObjAndMeta(bck *meta.Bck, objName string) error {
   106  	if cmn.Rom.FastV(4, cos.SmoduleEC) {
   107  		nlog.Infof("Delete request for %s", bck.Cname(objName))
   108  	}
   109  
   110  	ct, err := core.NewCTFromBO(bck.Bucket(), objName, core.T.Bowner(), fs.ECSliceType)
   111  	if err != nil {
   112  		return err
   113  	}
   114  	ct.Lock(true)
   115  	defer ct.Unlock(true)
   116  
   117  	// to be consistent with PUT, object's files are deleted in a reversed
   118  	// order: first Metafile is removed, then Replica/Slice
   119  	// Why: the main object is gone already, so we do not want any target
   120  	// responds that it has the object because it has metafile. We delete
   121  	// metafile that makes remained slices/replicas outdated and can be cleaned
   122  	// up later by LRU or other runner
   123  	for _, tp := range []string{fs.ECMetaType, fs.ObjectType, fs.ECSliceType} {
   124  		fqnMeta, _, err := core.HrwFQN(bck.Bucket(), tp, objName)
   125  		if err != nil {
   126  			return err
   127  		}
   128  		if err := os.Remove(fqnMeta); err != nil && !os.IsNotExist(err) {
   129  			return fmt.Errorf("error removing %s %q: %w", tp, fqnMeta, err)
   130  		}
   131  	}
   132  
   133  	return nil
   134  }
   135  
   136  func (r *XactRespond) trySendCT(iReq intraReq, hdr *transport.ObjHdr, bck *meta.Bck) error {
   137  	var (
   138  		fqn, metaFQN string
   139  		md           *Metadata
   140  		objName      = hdr.ObjName
   141  	)
   142  	if cmn.Rom.FastV(4, cos.SmoduleEC) {
   143  		nlog.Infof("Received request for slice %d of %s", iReq.meta.SliceID, objName)
   144  	}
   145  	if iReq.isSlice {
   146  		ct, err := core.NewCTFromBO(bck.Bucket(), objName, core.T.Bowner(), fs.ECSliceType)
   147  		if err != nil {
   148  			return err
   149  		}
   150  		ct.Lock(false)
   151  		defer ct.Unlock(false)
   152  		fqn = ct.FQN()
   153  		metaFQN = ct.Make(fs.ECMetaType)
   154  		if md, err = LoadMetadata(metaFQN); err != nil {
   155  			return err
   156  		}
   157  	}
   158  
   159  	return r.dataResponse(respPut, hdr, fqn, bck, objName, md)
   160  }
   161  
   162  // DispatchReq is responsible for handling request from other targets
   163  func (r *XactRespond) DispatchReq(iReq intraReq, hdr *transport.ObjHdr, bck *meta.Bck) {
   164  	switch hdr.Opcode {
   165  	case reqDel:
   166  		// object cleanup request: delete replicas, slices and metafiles
   167  		if err := r.removeObjAndMeta(bck, hdr.ObjName); err != nil {
   168  			err = cmn.NewErrFailedTo(core.T, "delete", bck.Cname(hdr.ObjName), err)
   169  			r.AddErr(err, 0)
   170  		}
   171  	case reqGet:
   172  		err := r.trySendCT(iReq, hdr, bck)
   173  		if err != nil {
   174  			r.AddErr(err, 0)
   175  		}
   176  	default:
   177  		debug.Assert(false, "opcode", hdr.Opcode)
   178  		nlog.Errorf("Invalid request type %d", hdr.Opcode)
   179  	}
   180  }
   181  
   182  func (r *XactRespond) DispatchResp(iReq intraReq, hdr *transport.ObjHdr, object io.Reader) {
   183  	r.IncPending()
   184  	defer r.DecPending() // no async operation, so DecPending is deferred
   185  	switch hdr.Opcode {
   186  	case reqPut:
   187  		// a remote target sent a replica/slice while it was
   188  		// encoding or restoring an object. In this case it just saves
   189  		// the sent replica or slice to a local file along with its metadata
   190  
   191  		// Check if the request is valid: it must contain metadata
   192  		var (
   193  			err  error
   194  			meta = iReq.meta
   195  		)
   196  		if meta == nil {
   197  			nlog.Errorf("%s: no metadata for %s", core.T, hdr.Cname())
   198  			return
   199  		}
   200  
   201  		if cmn.Rom.FastV(4, cos.SmoduleEC) {
   202  			nlog.Infof("Got slice=%t from %s (#%d of %s) v%s, cksum: %s", iReq.isSlice, hdr.SID,
   203  				iReq.meta.SliceID, hdr.Cname(), meta.ObjVersion, meta.CksumValue)
   204  		}
   205  		md := meta.NewPack()
   206  		if iReq.isSlice {
   207  			args := &WriteArgs{Reader: object, MD: md, BID: iReq.bid, Generation: meta.Generation, Xact: r}
   208  			err = WriteSliceAndMeta(hdr, args)
   209  		} else {
   210  			var lom *core.LOM
   211  			lom, err = core.AllocLomFromHdr(hdr)
   212  			if err == nil {
   213  				args := &WriteArgs{
   214  					Reader:     object,
   215  					MD:         md,
   216  					Cksum:      hdr.ObjAttrs.Cksum,
   217  					BID:        iReq.bid,
   218  					Generation: meta.Generation,
   219  					Xact:       r,
   220  				}
   221  				err = WriteReplicaAndMeta(lom, args)
   222  			}
   223  			core.FreeLOM(lom)
   224  		}
   225  		if err != nil {
   226  			r.AddErr(err, 0)
   227  			return
   228  		}
   229  		r.ObjsAdd(1, hdr.ObjAttrs.Size)
   230  	default:
   231  		debug.Assert(false, "opcode", hdr.Opcode)
   232  		nlog.Errorf("Invalid request type: %d", hdr.Opcode)
   233  	}
   234  }
   235  
   236  func (r *XactRespond) Stop(err error) { r.Abort(err) }
   237  
   238  func (r *XactRespond) stop() {
   239  	r.DemandBase.Stop()
   240  	r.Finish()
   241  }
   242  
   243  // (compare w/ XactGet/Put)
   244  func (r *XactRespond) Snap() *core.Snap { return r.baseSnap() }