github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ec/manager.go (about)

     1  // Package ec provides erasure coding (EC) based data protection for AIStore.
     2  /*
     3   * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ec
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	ratomic "sync/atomic"
    12  
    13  	"github.com/NVIDIA/aistore/api/apc"
    14  	"github.com/NVIDIA/aistore/cmn"
    15  	"github.com/NVIDIA/aistore/cmn/atomic"
    16  	"github.com/NVIDIA/aistore/cmn/cos"
    17  	"github.com/NVIDIA/aistore/cmn/debug"
    18  	"github.com/NVIDIA/aistore/cmn/nlog"
    19  	"github.com/NVIDIA/aistore/core"
    20  	"github.com/NVIDIA/aistore/core/meta"
    21  	"github.com/NVIDIA/aistore/fs"
    22  	"github.com/NVIDIA/aistore/transport"
    23  	"github.com/NVIDIA/aistore/transport/bundle"
    24  	"github.com/NVIDIA/aistore/xact/xreg"
    25  )
    26  
    27  type Manager struct {
    28  	bmd *meta.BMD
    29  
    30  	netReq  string // network used to send object request
    31  	netResp string // network used to send/receive slices
    32  
    33  	// streams
    34  	reqBundle  ratomic.Pointer[bundle.Streams]
    35  	respBundle ratomic.Pointer[bundle.Streams]
    36  
    37  	bundleEnabled atomic.Bool // to disable and enable on the fly
    38  }
    39  
    40  var (
    41  	ECM        *Manager
    42  	errSkipped = errors.New("skipped") // CT is skipped due to EC unsupported for the content type
    43  )
    44  
    45  func initManager() (err error) {
    46  	ECM = &Manager{
    47  		netReq:  cmn.NetIntraControl,
    48  		netResp: cmn.NetIntraData,
    49  		bmd:     core.T.Bowner().Get(),
    50  	}
    51  	if ECM.bmd.IsECUsed() {
    52  		err = ECM.initECBundles()
    53  	}
    54  	return err
    55  }
    56  
    57  func (mgr *Manager) req() *bundle.Streams  { return mgr.reqBundle.Load() }
    58  func (mgr *Manager) resp() *bundle.Streams { return mgr.respBundle.Load() }
    59  
    60  func (mgr *Manager) initECBundles() error {
    61  	if !mgr.bundleEnabled.CAS(false, true) {
    62  		return nil
    63  	}
    64  	if err := transport.Handle(ReqStreamName, ECM.recvRequest); err != nil {
    65  		return fmt.Errorf("failed to register recvRequest: %v", err)
    66  	}
    67  	if err := transport.Handle(RespStreamName, ECM.recvResponse); err != nil {
    68  		return fmt.Errorf("failed to register respResponse: %v", err)
    69  	}
    70  	cbReq := func(hdr *transport.ObjHdr, _ io.ReadCloser, _ any, err error) {
    71  		if err != nil {
    72  			nlog.Errorf("failed to request %s: %v", hdr.Cname(), err)
    73  		}
    74  	}
    75  	var (
    76  		client      = transport.NewIntraDataClient()
    77  		config      = cmn.GCO.Get()
    78  		compression = config.EC.Compression
    79  		extraReq    = transport.Extra{Callback: cbReq, Compression: compression, Config: config}
    80  	)
    81  	reqSbArgs := bundle.Args{
    82  		Multiplier: config.EC.SbundleMult,
    83  		Extra:      &extraReq,
    84  		Net:        mgr.netReq,
    85  		Trname:     ReqStreamName,
    86  	}
    87  	respSbArgs := bundle.Args{
    88  		Multiplier: config.EC.SbundleMult,
    89  		Trname:     RespStreamName,
    90  		Net:        mgr.netResp,
    91  		Extra:      &transport.Extra{Compression: compression, Config: config},
    92  	}
    93  
    94  	mgr.reqBundle.Store(bundle.New(client, reqSbArgs))
    95  	mgr.respBundle.Store(bundle.New(client, respSbArgs))
    96  
    97  	return nil
    98  }
    99  
   100  func (mgr *Manager) closeECBundles() {
   101  	if !mgr.bundleEnabled.CAS(true, false) {
   102  		return
   103  	}
   104  	mgr.req().Close(false)
   105  	mgr.resp().Close(false)
   106  	transport.Unhandle(ReqStreamName)
   107  	transport.Unhandle(RespStreamName)
   108  }
   109  
   110  func (mgr *Manager) NewGetXact(bck *cmn.Bck) *XactGet         { return newGetXact(bck, mgr) }
   111  func (mgr *Manager) NewPutXact(bck *cmn.Bck) *XactPut         { return newPutXact(bck, mgr) }
   112  func (mgr *Manager) NewRespondXact(bck *cmn.Bck) *XactRespond { return newRespondXact(bck, mgr) }
   113  
   114  func (*Manager) RestoreBckGetXact(bck *meta.Bck) *XactGet {
   115  	xctn, err := _renewXact(bck, apc.ActECGet)
   116  	debug.AssertNoErr(err) // TODO: handle, here and elsewhere
   117  	return xctn.(*XactGet)
   118  }
   119  
   120  func (*Manager) RestoreBckPutXact(bck *meta.Bck) *XactPut {
   121  	xctn, err := _renewXact(bck, apc.ActECPut)
   122  	debug.AssertNoErr(err)
   123  	return xctn.(*XactPut)
   124  }
   125  
   126  func (*Manager) RestoreBckRespXact(bck *meta.Bck) *XactRespond {
   127  	xctn, err := _renewXact(bck, apc.ActECRespond)
   128  	debug.AssertNoErr(err)
   129  	return xctn.(*XactRespond)
   130  }
   131  
   132  func _renewXact(bck *meta.Bck, kind string) (core.Xact, error) {
   133  	rns := xreg.RenewBucketXact(kind, bck, xreg.Args{})
   134  	if rns.Err != nil {
   135  		return nil, rns.Err
   136  	}
   137  	return rns.Entry.Get(), nil
   138  }
   139  
   140  // A function to process command requests from other targets
   141  func (mgr *Manager) recvRequest(hdr *transport.ObjHdr, objReader io.Reader, err error) error {
   142  	defer transport.FreeRecv(objReader)
   143  	if err != nil {
   144  		nlog.Errorf("request failed: %v", err)
   145  		return err
   146  	}
   147  	// check if the header contains a valid request
   148  	if len(hdr.Opaque) == 0 {
   149  		err := fmt.Errorf("invalid header: [%+v]", hdr)
   150  		nlog.Errorln(err)
   151  		return err
   152  	}
   153  
   154  	unpacker := cos.NewUnpacker(hdr.Opaque)
   155  	iReq := intraReq{}
   156  	if err := unpacker.ReadAny(&iReq); err != nil {
   157  		nlog.Errorf("failed to unmarshal request: %v", err)
   158  		return err
   159  	}
   160  
   161  	// command requests should not have a body, but if it has,
   162  	// the body must be drained to avoid errors
   163  	if hdr.ObjAttrs.Size != 0 {
   164  		if _, err := io.ReadAll(objReader); err != nil {
   165  			nlog.Errorf("failed to read request body: %v", err)
   166  			return err
   167  		}
   168  	}
   169  	bck := meta.CloneBck(&hdr.Bck)
   170  	if err = bck.Init(core.T.Bowner()); err != nil {
   171  		if _, ok := err.(*cmn.ErrRemoteBckNotFound); !ok { // is ais
   172  			nlog.Errorf("failed to init bucket %s: %v", bck, err)
   173  			return err
   174  		}
   175  	}
   176  	mgr.RestoreBckRespXact(bck).DispatchReq(iReq, hdr, bck)
   177  	return nil
   178  }
   179  
   180  // A function to process big chunks of data (replica/slice/meta) sent from other targets
   181  func (mgr *Manager) recvResponse(hdr *transport.ObjHdr, objReader io.Reader, err error) error {
   182  	defer transport.DrainAndFreeReader(objReader)
   183  	if err != nil {
   184  		nlog.Errorln("failed to receive response:", err)
   185  		return err
   186  	}
   187  	// check if the request is valid
   188  	if len(hdr.Opaque) == 0 {
   189  		err := fmt.Errorf("invalid header: [%+v]", hdr)
   190  		nlog.Errorln(err)
   191  		return err
   192  	}
   193  
   194  	unpacker := cos.NewUnpacker(hdr.Opaque)
   195  	iReq := intraReq{}
   196  	if err := unpacker.ReadAny(&iReq); err != nil {
   197  		nlog.Errorln("failed to unpack request:", err)
   198  		return err
   199  	}
   200  	bck := meta.CloneBck(&hdr.Bck)
   201  	if err = bck.Init(core.T.Bowner()); err != nil {
   202  		if _, ok := err.(*cmn.ErrRemoteBckNotFound); !ok { // is ais
   203  			nlog.Errorln(err)
   204  			return err
   205  		}
   206  	}
   207  	switch hdr.Opcode {
   208  	case reqPut:
   209  		mgr.RestoreBckRespXact(bck).DispatchResp(iReq, hdr, objReader)
   210  	case respPut:
   211  		// Process the request even if the number of targets is insufficient
   212  		// (might've started when we had enough)
   213  		mgr.RestoreBckGetXact(bck).DispatchResp(iReq, hdr, bck, objReader)
   214  	default:
   215  		debug.Assertf(false, "unknown EC response action %d", hdr.Opcode)
   216  	}
   217  	return nil
   218  }
   219  
   220  // EncodeObject generates slices using Reed-Solom algorithm:
   221  //   - lom - object to encode
   222  //   - intra - if true, it is internal request and has low priority
   223  //   - cb - optional callback that is called after the object is encoded
   224  func (mgr *Manager) EncodeObject(lom *core.LOM, cb core.OnFinishObj) error {
   225  	if !lom.ECEnabled() {
   226  		return ErrorECDisabled
   227  	}
   228  	cs := fs.Cap()
   229  	if err := cs.Err(); err != nil {
   230  		return err
   231  	}
   232  	spec, _ := fs.CSM.FileSpec(lom.FQN)
   233  	if spec != nil && !spec.PermToProcess() {
   234  		return errSkipped
   235  	}
   236  
   237  	req := allocateReq(ActSplit, lom.LIF())
   238  	req.IsCopy = IsECCopy(lom.SizeBytes(), &lom.Bprops().EC)
   239  	if cb != nil {
   240  		req.rebuild = true
   241  		req.Callback = cb
   242  	}
   243  
   244  	mgr.RestoreBckPutXact(lom.Bck()).encode(req, lom)
   245  
   246  	return nil
   247  }
   248  
   249  func (mgr *Manager) CleanupObject(lom *core.LOM) {
   250  	if !lom.ECEnabled() {
   251  		return
   252  	}
   253  	debug.Assert(lom.FQN != "" && lom.Mountpath().Path != "")
   254  	req := allocateReq(ActDelete, lom.LIF())
   255  	mgr.RestoreBckPutXact(lom.Bck()).cleanup(req, lom)
   256  }
   257  
   258  func (mgr *Manager) RestoreObject(lom *core.LOM) error {
   259  	if !lom.ECEnabled() {
   260  		return ErrorECDisabled
   261  	}
   262  	cs := fs.Cap()
   263  	if err := cs.Err(); err != nil {
   264  		return err
   265  	}
   266  
   267  	debug.Assert(lom.Mountpath() != nil && lom.Mountpath().Path != "")
   268  	req := allocateReq(ActRestore, lom.LIF())
   269  	errCh := make(chan error) // unbuffered
   270  	req.ErrCh = errCh
   271  	mgr.RestoreBckGetXact(lom.Bck()).decode(req, lom)
   272  
   273  	// wait for EC completes restoring the object
   274  	return <-errCh
   275  }
   276  
   277  // disableBck starts to reject new EC requests, rejects pending ones
   278  func (mgr *Manager) disableBck(bck *meta.Bck) {
   279  	mgr.RestoreBckGetXact(bck).ClearRequests()
   280  	mgr.RestoreBckPutXact(bck).ClearRequests()
   281  }
   282  
   283  // enableBck aborts xctn disable and starts to accept new EC requests
   284  // enableBck uses the same channel as disableBck, so order of executing them is the same as
   285  // order which they arrived to a target in
   286  func (mgr *Manager) enableBck(bck *meta.Bck) {
   287  	mgr.RestoreBckGetXact(bck).EnableRequests()
   288  	mgr.RestoreBckPutXact(bck).EnableRequests()
   289  }
   290  
   291  func (mgr *Manager) BMDChanged() error {
   292  	newBMD := core.T.Bowner().Get()
   293  	oldBMD := mgr.bmd
   294  	if newBMD.Version <= mgr.bmd.Version {
   295  		return nil
   296  	}
   297  	mgr.bmd = newBMD
   298  
   299  	// globally
   300  	if newBMD.IsECUsed() && !oldBMD.IsECUsed() {
   301  		if err := mgr.initECBundles(); err != nil {
   302  			return err
   303  		}
   304  	} else if !newBMD.IsECUsed() && oldBMD.IsECUsed() {
   305  		mgr.closeECBundles()
   306  		return nil
   307  	}
   308  
   309  	// by bucket
   310  	newBMD.Range(nil, nil, func(nbck *meta.Bck) bool {
   311  		oprops, ok := oldBMD.Get(nbck)
   312  		if !ok {
   313  			if nbck.Props.EC.Enabled {
   314  				mgr.enableBck(nbck)
   315  			}
   316  			return false
   317  		}
   318  		if !oprops.EC.Enabled && nbck.Props.EC.Enabled {
   319  			mgr.enableBck(nbck)
   320  		} else if oprops.EC.Enabled && !nbck.Props.EC.Enabled {
   321  			mgr.disableBck(nbck)
   322  		}
   323  
   324  		return false
   325  	})
   326  	return nil
   327  }