github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ec/putxaction.go (about) 1 // Package ec provides erasure coding (EC) based data protection for AIStore. 2 /* 3 * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package ec 6 7 import ( 8 "fmt" 9 "sync" 10 "time" 11 12 "github.com/NVIDIA/aistore/api/apc" 13 "github.com/NVIDIA/aistore/cmn" 14 "github.com/NVIDIA/aistore/cmn/cos" 15 "github.com/NVIDIA/aistore/cmn/debug" 16 "github.com/NVIDIA/aistore/cmn/nlog" 17 "github.com/NVIDIA/aistore/core" 18 "github.com/NVIDIA/aistore/core/meta" 19 "github.com/NVIDIA/aistore/fs" 20 "github.com/NVIDIA/aistore/xact" 21 "github.com/NVIDIA/aistore/xact/xreg" 22 ) 23 24 type ( 25 putFactory struct { 26 xreg.RenewBase 27 xctn *XactPut 28 } 29 // Erasure coding runner: accepts requests and dispatches them to 30 // a correct mountpath runner. Runner uses dedicated to EC memory manager 31 // inherited by dependent mountpath runners 32 XactPut struct { 33 xactECBase 34 xactReqBase 35 putJoggers map[string]*putJogger // mountpath joggers for PUT/DEL 36 } 37 // extended x-ec-put statistics 38 ExtECPutStats struct { 39 AvgEncodeTime cos.Duration `json:"ec.encode.ns"` 40 AvgDeleteTime cos.Duration `json:"ec.delete.ns"` 41 EncodeCount int64 `json:"ec.encode.n,string"` 42 DeleteCount int64 `json:"ec.delete.n,string"` 43 EncodeSize int64 `json:"ec.encode.size,string"` 44 EncodeErrCount int64 `json:"ec.encode.err.n,string"` 45 DeleteErrCount int64 `json:"ec.delete.err.n,string"` 46 AvgObjTime cos.Duration `json:"ec.obj.process.ns"` 47 AvgQueueLen float64 `json:"ec.queue.len.f"` 48 IsIdle bool `json:"is_idle"` 49 } 50 ) 51 52 // interface guard 53 var ( 54 _ xact.Demand = (*XactPut)(nil) 55 _ xreg.Renewable = (*putFactory)(nil) 56 ) 57 58 //////////////// 59 // putFactory // 60 //////////////// 61 62 func (*putFactory) New(_ xreg.Args, bck *meta.Bck) xreg.Renewable { 63 p := &putFactory{RenewBase: xreg.RenewBase{Bck: bck}} 64 return p 65 } 66 67 func (p *putFactory) Start() error { 68 xec := ECM.NewPutXact(p.Bck.Bucket()) 69 xec.DemandBase.Init(cos.GenUUID(), p.Kind(), p.Bck, 0 /*use default*/) 70 p.xctn = xec 71 go xec.Run(nil) 72 return nil 73 } 74 75 func (*putFactory) Kind() string { return apc.ActECPut } 76 func (p *putFactory) Get() core.Xact { return p.xctn } 77 78 func (p *putFactory) WhenPrevIsRunning(xprev xreg.Renewable) (xreg.WPR, error) { 79 debug.Assertf(false, "%s vs %s", p.Str(p.Kind()), xprev) // xreg.usePrev() must've returned true 80 return xreg.WprUse, nil 81 } 82 83 ///////////// 84 // XactPut // 85 ///////////// 86 87 func newPutXact(bck *cmn.Bck, mgr *Manager) *XactPut { 88 var ( 89 avail, disabled = fs.Get() 90 totalPaths = len(avail) + len(disabled) 91 config = cmn.GCO.Get() 92 xctn = &XactPut{ 93 putJoggers: make(map[string]*putJogger, totalPaths), 94 } 95 ) 96 xctn.xactECBase.init(config, bck, mgr) 97 xctn.xactReqBase.init() 98 99 // create all runners but do not start them until Run is called 100 for mpath := range avail { 101 putJog := xctn.newPutJogger(mpath) 102 xctn.putJoggers[mpath] = putJog 103 } 104 for mpath := range disabled { 105 putJog := xctn.newPutJogger(mpath) 106 xctn.putJoggers[mpath] = putJog 107 } 108 return xctn 109 } 110 111 func (r *XactPut) newPutJogger(mpath string) *putJogger { 112 j := &putJogger{ 113 parent: r, 114 mpath: mpath, 115 putCh: make(chan *request, requestBufSizeFS), 116 xactCh: make(chan *request, requestBufSizeEncode), 117 } 118 j.stopCh.Init() 119 return j 120 } 121 122 func (r *XactPut) dispatchRequest(req *request, lom *core.LOM) error { 123 debug.Assert(req.Action == ActDelete || req.Action == ActSplit, req.Action) 124 debug.Assert(req.ErrCh == nil, "ec-put does not support ErrCh") 125 if !r.ecRequestsEnabled() { 126 return ErrorECDisabled 127 } 128 switch req.Action { 129 case ActSplit: 130 r.stats.updateEncode(lom.SizeBytes()) 131 case ActDelete: 132 r.stats.updateDelete() 133 default: 134 return fmt.Errorf("invalid request's action %s for putxaction", req.Action) 135 } 136 137 jogger, ok := r.putJoggers[lom.Mountpath().Path] 138 if !ok { 139 debug.Assert(false, "invalid "+lom.Mountpath().String()) 140 } 141 if cmn.Rom.FastV(4, cos.SmoduleEC) { 142 nlog.Infof("ECPUT (bg queue = %d): dispatching object %s....", len(jogger.putCh), lom) 143 } 144 if req.rebuild { 145 jogger.xactCh <- req 146 } else { 147 r.stats.updateQueue(len(jogger.putCh)) 148 jogger.putCh <- req 149 } 150 return nil 151 } 152 153 func (r *XactPut) Run(*sync.WaitGroup) { 154 nlog.Infoln(r.Name()) 155 156 var wg sync.WaitGroup 157 for _, jog := range r.putJoggers { 158 wg.Add(1) 159 go jog.run(&wg) 160 } 161 162 ticker := time.NewTicker(r.config.Periodic.StatsTime.D()) 163 r.mainLoop(ticker) 164 ticker.Stop() 165 wg.Wait() 166 // not closing stream bundles as they are shared across EC xactions 167 r.Finish() 168 } 169 170 // all requests are equal, throttle TODO 171 func (r *XactPut) mainLoop(ticker *time.Ticker) { 172 for { 173 select { 174 case <-ticker.C: 175 if cmn.Rom.FastV(4, cos.SmoduleEC) { 176 if s := fmt.Sprintf("%v", r.Snap()); s != "" { 177 nlog.Infoln(s) 178 } 179 } 180 case <-r.IdleTimer(): 181 // It's OK not to notify ecmanager, it'll just have stopped xctn in a map. 182 r.stop() 183 return 184 case msg := <-r.controlCh: 185 if msg.Action == ActEnableRequests { 186 r.setEcRequestsEnabled() 187 break 188 } 189 debug.Assert(msg.Action == ActClearRequests, msg.Action) 190 191 r.setEcRequestsDisabled() 192 r.stop() 193 return 194 case <-r.ChanAbort(): 195 r.stop() 196 return 197 } 198 } 199 } 200 201 func (r *XactPut) Stop(err error) { r.Abort(err) } 202 203 func (r *XactPut) stop() { 204 r.DemandBase.Stop() 205 for _, jog := range r.putJoggers { 206 jog.stop() 207 } 208 209 // Don't close bundles, they are shared between bucket's EC actions 210 r.Finish() 211 } 212 213 // Encode schedules FQN for erasure coding process 214 func (r *XactPut) encode(req *request, lom *core.LOM) { 215 now := time.Now() 216 req.putTime, req.tm = now, now 217 if err := r.dispatchRequest(req, lom); err != nil { 218 nlog.Errorf("Failed to encode %s: %v", lom, err) 219 freeReq(req) 220 } 221 } 222 223 // Cleanup deletes all object slices or copies after the main object is removed 224 func (r *XactPut) cleanup(req *request, lom *core.LOM) { 225 now := time.Now() 226 req.putTime, req.tm = now, now 227 228 if err := r.dispatchRequest(req, lom); err != nil { 229 nlog.Errorf("Failed to cleanup %s: %v", lom, err) 230 freeReq(req) 231 } 232 } 233 234 func (r *XactPut) Snap() (snap *core.Snap) { 235 snap = r.baseSnap() 236 st := r.stats.stats() 237 snap.Ext = &ExtECPutStats{ 238 AvgEncodeTime: cos.Duration(st.EncodeTime), 239 EncodeSize: st.EncodeSize, 240 EncodeCount: st.PutReq, 241 EncodeErrCount: st.EncodeErr, 242 AvgDeleteTime: cos.Duration(st.DeleteTime), 243 DeleteErrCount: st.DeleteErr, 244 DeleteCount: st.DelReq, 245 AvgObjTime: cos.Duration(st.ObjTime), 246 AvgQueueLen: st.QueueLen, 247 IsIdle: r.Pending() == 0, 248 } 249 250 snap.Stats.Objs = st.PutReq + st.DelReq // TODO: support in and out 251 snap.Stats.Bytes = st.EncodeSize 252 return 253 }