github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ec/bckencodexact.go (about)

     1  // Package ec provides erasure coding (EC) based data protection for AIStore.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ec
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"sync"
    11  
    12  	"github.com/NVIDIA/aistore/api/apc"
    13  	"github.com/NVIDIA/aistore/cmn"
    14  	"github.com/NVIDIA/aistore/cmn/cos"
    15  	"github.com/NVIDIA/aistore/cmn/nlog"
    16  	"github.com/NVIDIA/aistore/core"
    17  	"github.com/NVIDIA/aistore/core/meta"
    18  	"github.com/NVIDIA/aistore/fs"
    19  	"github.com/NVIDIA/aistore/fs/mpather"
    20  	"github.com/NVIDIA/aistore/xact"
    21  	"github.com/NVIDIA/aistore/xact/xreg"
    22  )
    23  
    24  type (
    25  	encFactory struct {
    26  		xreg.RenewBase
    27  		xctn  *XactBckEncode
    28  		phase string
    29  	}
    30  	XactBckEncode struct {
    31  		xact.Base
    32  		bck  *meta.Bck
    33  		wg   *sync.WaitGroup // to wait for EC finishes all objects
    34  		smap *meta.Smap
    35  	}
    36  )
    37  
    38  // interface guard
    39  var (
    40  	_ core.Xact      = (*XactBckEncode)(nil)
    41  	_ xreg.Renewable = (*encFactory)(nil)
    42  )
    43  
    44  ////////////////
    45  // encFactory //
    46  ////////////////
    47  
    48  func (*encFactory) New(args xreg.Args, bck *meta.Bck) xreg.Renewable {
    49  	custom := args.Custom.(*xreg.ECEncodeArgs)
    50  	p := &encFactory{RenewBase: xreg.RenewBase{Args: args, Bck: bck}, phase: custom.Phase}
    51  	return p
    52  }
    53  
    54  func (p *encFactory) Start() error {
    55  	p.xctn = newXactBckEncode(p.Bck, p.UUID())
    56  	return nil
    57  }
    58  
    59  func (*encFactory) Kind() string     { return apc.ActECEncode }
    60  func (p *encFactory) Get() core.Xact { return p.xctn }
    61  
    62  func (p *encFactory) WhenPrevIsRunning(prevEntry xreg.Renewable) (wpr xreg.WPR, err error) {
    63  	prev := prevEntry.(*encFactory)
    64  	if prev.phase == apc.ActBegin && p.phase == apc.ActCommit {
    65  		prev.phase = apc.ActCommit // transition
    66  		wpr = xreg.WprUse
    67  		return
    68  	}
    69  	err = fmt.Errorf("%s(%s, phase %s): cannot %s", p.Kind(), prev.xctn.Bck().Name, prev.phase, p.phase)
    70  	return
    71  }
    72  
    73  ///////////////////
    74  // XactBckEncode //
    75  ///////////////////
    76  
    77  func newXactBckEncode(bck *meta.Bck, uuid string) (r *XactBckEncode) {
    78  	r = &XactBckEncode{bck: bck, wg: &sync.WaitGroup{}, smap: core.T.Sowner().Get()}
    79  	r.InitBase(uuid, apc.ActECEncode, bck)
    80  	return
    81  }
    82  
    83  func (r *XactBckEncode) Run(wg *sync.WaitGroup) {
    84  	wg.Done()
    85  	bck := r.bck
    86  	if err := bck.Init(core.T.Bowner()); err != nil {
    87  		r.AddErr(err)
    88  		r.Finish()
    89  		return
    90  	}
    91  	if !bck.Props.EC.Enabled {
    92  		r.AddErr(fmt.Errorf("%s does not have EC enabled", r.bck.Cname("")))
    93  		r.Finish()
    94  		return
    95  	}
    96  
    97  	opts := &mpather.JgroupOpts{
    98  		CTs:      []string{fs.ObjectType},
    99  		VisitObj: r.bckEncode,
   100  		DoLoad:   mpather.LoadUnsafe,
   101  	}
   102  	opts.Bck.Copy(r.bck.Bucket())
   103  	jg := mpather.NewJoggerGroup(opts, cmn.GCO.Get(), "")
   104  	jg.Run()
   105  
   106  	select {
   107  	case <-r.ChanAbort():
   108  		jg.Stop()
   109  	case <-jg.ListenFinished():
   110  		err := jg.Stop()
   111  		if err != nil {
   112  			r.AddErr(err)
   113  		}
   114  	}
   115  	r.wg.Wait() // Need to wait for all async actions to finish.
   116  
   117  	r.Finish()
   118  }
   119  
   120  func (r *XactBckEncode) beforeECObj() { r.wg.Add(1) }
   121  
   122  func (r *XactBckEncode) afterECObj(lom *core.LOM, err error) {
   123  	if err == nil {
   124  		r.LomAdd(lom)
   125  	} else if err != errSkipped {
   126  		nlog.Errorf("Failed to erasure-code %s: %v", lom.Cname(), err)
   127  	}
   128  
   129  	r.wg.Done()
   130  }
   131  
   132  // Walks through all files in 'obj' directory, and calls EC.Encode for every
   133  // file whose HRW points to this file and the file does not have corresponding
   134  // metadata file in 'meta' directory
   135  func (r *XactBckEncode) bckEncode(lom *core.LOM, _ []byte) error {
   136  	_, local, err := lom.HrwTarget(r.smap)
   137  	if err != nil {
   138  		nlog.Errorf("%s: %s", lom, err)
   139  		return nil
   140  	}
   141  	// An object replica - skip EC.
   142  	if !local {
   143  		return nil
   144  	}
   145  	mdFQN, _, err := core.HrwFQN(lom.Bck().Bucket(), fs.ECMetaType, lom.ObjName)
   146  	if err != nil {
   147  		nlog.Warningf("metadata FQN generation failed %q: %v", lom, err)
   148  		return nil
   149  	}
   150  	err = cos.Stat(mdFQN)
   151  	// Metadata file exists - the object was already EC'ed before.
   152  	if err == nil {
   153  		return nil
   154  	}
   155  	if !os.IsNotExist(err) {
   156  		nlog.Warningf("failed to stat %q: %v", mdFQN, err)
   157  		return nil
   158  	}
   159  
   160  	// beforeECObj increases a counter, and callback afterECObj decreases it.
   161  	// After Walk finishes, the xaction waits until counter drops to zero.
   162  	// That means all objects have been processed and xaction can finalize.
   163  	r.beforeECObj()
   164  	if err = ECM.EncodeObject(lom, r.afterECObj); err != nil {
   165  		// something went wrong: abort xaction
   166  		r.afterECObj(lom, err)
   167  		if err != errSkipped {
   168  			return err
   169  		}
   170  	}
   171  	return nil
   172  }
   173  
   174  func (r *XactBckEncode) Snap() (snap *core.Snap) {
   175  	snap = &core.Snap{}
   176  	r.ToSnap(snap)
   177  
   178  	snap.IdleX = r.IsIdle()
   179  	return
   180  }