github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ec/bckencodexact.go (about) 1 // Package ec provides erasure coding (EC) based data protection for AIStore. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package ec 6 7 import ( 8 "fmt" 9 "os" 10 "sync" 11 12 "github.com/NVIDIA/aistore/api/apc" 13 "github.com/NVIDIA/aistore/cmn" 14 "github.com/NVIDIA/aistore/cmn/cos" 15 "github.com/NVIDIA/aistore/cmn/nlog" 16 "github.com/NVIDIA/aistore/core" 17 "github.com/NVIDIA/aistore/core/meta" 18 "github.com/NVIDIA/aistore/fs" 19 "github.com/NVIDIA/aistore/fs/mpather" 20 "github.com/NVIDIA/aistore/xact" 21 "github.com/NVIDIA/aistore/xact/xreg" 22 ) 23 24 type ( 25 encFactory struct { 26 xreg.RenewBase 27 xctn *XactBckEncode 28 phase string 29 } 30 XactBckEncode struct { 31 xact.Base 32 bck *meta.Bck 33 wg *sync.WaitGroup // to wait for EC finishes all objects 34 smap *meta.Smap 35 } 36 ) 37 38 // interface guard 39 var ( 40 _ core.Xact = (*XactBckEncode)(nil) 41 _ xreg.Renewable = (*encFactory)(nil) 42 ) 43 44 //////////////// 45 // encFactory // 46 //////////////// 47 48 func (*encFactory) New(args xreg.Args, bck *meta.Bck) xreg.Renewable { 49 custom := args.Custom.(*xreg.ECEncodeArgs) 50 p := &encFactory{RenewBase: xreg.RenewBase{Args: args, Bck: bck}, phase: custom.Phase} 51 return p 52 } 53 54 func (p *encFactory) Start() error { 55 p.xctn = newXactBckEncode(p.Bck, p.UUID()) 56 return nil 57 } 58 59 func (*encFactory) Kind() string { return apc.ActECEncode } 60 func (p *encFactory) Get() core.Xact { return p.xctn } 61 62 func (p *encFactory) WhenPrevIsRunning(prevEntry xreg.Renewable) (wpr xreg.WPR, err error) { 63 prev := prevEntry.(*encFactory) 64 if prev.phase == apc.ActBegin && p.phase == apc.ActCommit { 65 prev.phase = apc.ActCommit // transition 66 wpr = xreg.WprUse 67 return 68 } 69 err = fmt.Errorf("%s(%s, phase %s): cannot %s", p.Kind(), prev.xctn.Bck().Name, prev.phase, p.phase) 70 return 71 } 72 73 /////////////////// 74 // XactBckEncode // 75 /////////////////// 76 77 func newXactBckEncode(bck *meta.Bck, uuid string) (r *XactBckEncode) { 78 r = &XactBckEncode{bck: bck, wg: &sync.WaitGroup{}, smap: core.T.Sowner().Get()} 79 r.InitBase(uuid, apc.ActECEncode, bck) 80 return 81 } 82 83 func (r *XactBckEncode) Run(wg *sync.WaitGroup) { 84 wg.Done() 85 bck := r.bck 86 if err := bck.Init(core.T.Bowner()); err != nil { 87 r.AddErr(err) 88 r.Finish() 89 return 90 } 91 if !bck.Props.EC.Enabled { 92 r.AddErr(fmt.Errorf("%s does not have EC enabled", r.bck.Cname(""))) 93 r.Finish() 94 return 95 } 96 97 opts := &mpather.JgroupOpts{ 98 CTs: []string{fs.ObjectType}, 99 VisitObj: r.bckEncode, 100 DoLoad: mpather.LoadUnsafe, 101 } 102 opts.Bck.Copy(r.bck.Bucket()) 103 jg := mpather.NewJoggerGroup(opts, cmn.GCO.Get(), "") 104 jg.Run() 105 106 select { 107 case <-r.ChanAbort(): 108 jg.Stop() 109 case <-jg.ListenFinished(): 110 err := jg.Stop() 111 if err != nil { 112 r.AddErr(err) 113 } 114 } 115 r.wg.Wait() // Need to wait for all async actions to finish. 116 117 r.Finish() 118 } 119 120 func (r *XactBckEncode) beforeECObj() { r.wg.Add(1) } 121 122 func (r *XactBckEncode) afterECObj(lom *core.LOM, err error) { 123 if err == nil { 124 r.LomAdd(lom) 125 } else if err != errSkipped { 126 nlog.Errorf("Failed to erasure-code %s: %v", lom.Cname(), err) 127 } 128 129 r.wg.Done() 130 } 131 132 // Walks through all files in 'obj' directory, and calls EC.Encode for every 133 // file whose HRW points to this file and the file does not have corresponding 134 // metadata file in 'meta' directory 135 func (r *XactBckEncode) bckEncode(lom *core.LOM, _ []byte) error { 136 _, local, err := lom.HrwTarget(r.smap) 137 if err != nil { 138 nlog.Errorf("%s: %s", lom, err) 139 return nil 140 } 141 // An object replica - skip EC. 142 if !local { 143 return nil 144 } 145 mdFQN, _, err := core.HrwFQN(lom.Bck().Bucket(), fs.ECMetaType, lom.ObjName) 146 if err != nil { 147 nlog.Warningf("metadata FQN generation failed %q: %v", lom, err) 148 return nil 149 } 150 err = cos.Stat(mdFQN) 151 // Metadata file exists - the object was already EC'ed before. 152 if err == nil { 153 return nil 154 } 155 if !os.IsNotExist(err) { 156 nlog.Warningf("failed to stat %q: %v", mdFQN, err) 157 return nil 158 } 159 160 // beforeECObj increases a counter, and callback afterECObj decreases it. 161 // After Walk finishes, the xaction waits until counter drops to zero. 162 // That means all objects have been processed and xaction can finalize. 163 r.beforeECObj() 164 if err = ECM.EncodeObject(lom, r.afterECObj); err != nil { 165 // something went wrong: abort xaction 166 r.afterECObj(lom, err) 167 if err != errSkipped { 168 return err 169 } 170 } 171 return nil 172 } 173 174 func (r *XactBckEncode) Snap() (snap *core.Snap) { 175 snap = &core.Snap{} 176 r.ToSnap(snap) 177 178 snap.IdleX = r.IsIdle() 179 return 180 }