github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ec/manager.go (about) 1 // Package ec provides erasure coding (EC) based data protection for AIStore. 2 /* 3 * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package ec 6 7 import ( 8 "errors" 9 "fmt" 10 "io" 11 ratomic "sync/atomic" 12 13 "github.com/NVIDIA/aistore/api/apc" 14 "github.com/NVIDIA/aistore/cmn" 15 "github.com/NVIDIA/aistore/cmn/atomic" 16 "github.com/NVIDIA/aistore/cmn/cos" 17 "github.com/NVIDIA/aistore/cmn/debug" 18 "github.com/NVIDIA/aistore/cmn/nlog" 19 "github.com/NVIDIA/aistore/core" 20 "github.com/NVIDIA/aistore/core/meta" 21 "github.com/NVIDIA/aistore/fs" 22 "github.com/NVIDIA/aistore/transport" 23 "github.com/NVIDIA/aistore/transport/bundle" 24 "github.com/NVIDIA/aistore/xact/xreg" 25 ) 26 27 type Manager struct { 28 bmd *meta.BMD 29 30 netReq string // network used to send object request 31 netResp string // network used to send/receive slices 32 33 // streams 34 reqBundle ratomic.Pointer[bundle.Streams] 35 respBundle ratomic.Pointer[bundle.Streams] 36 37 bundleEnabled atomic.Bool // to disable and enable on the fly 38 } 39 40 var ( 41 ECM *Manager 42 errSkipped = errors.New("skipped") // CT is skipped due to EC unsupported for the content type 43 ) 44 45 func initManager() (err error) { 46 ECM = &Manager{ 47 netReq: cmn.NetIntraControl, 48 netResp: cmn.NetIntraData, 49 bmd: core.T.Bowner().Get(), 50 } 51 if ECM.bmd.IsECUsed() { 52 err = ECM.initECBundles() 53 } 54 return err 55 } 56 57 func (mgr *Manager) req() *bundle.Streams { return mgr.reqBundle.Load() } 58 func (mgr *Manager) resp() *bundle.Streams { return mgr.respBundle.Load() } 59 60 func (mgr *Manager) initECBundles() error { 61 if !mgr.bundleEnabled.CAS(false, true) { 62 return nil 63 } 64 if err := transport.Handle(ReqStreamName, ECM.recvRequest); err != nil { 65 return fmt.Errorf("failed to register recvRequest: %v", err) 66 } 67 if err := transport.Handle(RespStreamName, ECM.recvResponse); err != nil { 68 return fmt.Errorf("failed to register respResponse: %v", err) 69 } 70 cbReq := func(hdr *transport.ObjHdr, _ io.ReadCloser, _ any, err error) { 71 if err != nil { 72 nlog.Errorf("failed to request %s: %v", hdr.Cname(), err) 73 } 74 } 75 var ( 76 client = transport.NewIntraDataClient() 77 config = cmn.GCO.Get() 78 compression = config.EC.Compression 79 extraReq = transport.Extra{Callback: cbReq, Compression: compression, Config: config} 80 ) 81 reqSbArgs := bundle.Args{ 82 Multiplier: config.EC.SbundleMult, 83 Extra: &extraReq, 84 Net: mgr.netReq, 85 Trname: ReqStreamName, 86 } 87 respSbArgs := bundle.Args{ 88 Multiplier: config.EC.SbundleMult, 89 Trname: RespStreamName, 90 Net: mgr.netResp, 91 Extra: &transport.Extra{Compression: compression, Config: config}, 92 } 93 94 mgr.reqBundle.Store(bundle.New(client, reqSbArgs)) 95 mgr.respBundle.Store(bundle.New(client, respSbArgs)) 96 97 return nil 98 } 99 100 func (mgr *Manager) closeECBundles() { 101 if !mgr.bundleEnabled.CAS(true, false) { 102 return 103 } 104 mgr.req().Close(false) 105 mgr.resp().Close(false) 106 transport.Unhandle(ReqStreamName) 107 transport.Unhandle(RespStreamName) 108 } 109 110 func (mgr *Manager) NewGetXact(bck *cmn.Bck) *XactGet { return newGetXact(bck, mgr) } 111 func (mgr *Manager) NewPutXact(bck *cmn.Bck) *XactPut { return newPutXact(bck, mgr) } 112 func (mgr *Manager) NewRespondXact(bck *cmn.Bck) *XactRespond { return newRespondXact(bck, mgr) } 113 114 func (*Manager) RestoreBckGetXact(bck *meta.Bck) *XactGet { 115 xctn, err := _renewXact(bck, apc.ActECGet) 116 debug.AssertNoErr(err) // TODO: handle, here and elsewhere 117 return xctn.(*XactGet) 118 } 119 120 func (*Manager) RestoreBckPutXact(bck *meta.Bck) *XactPut { 121 xctn, err := _renewXact(bck, apc.ActECPut) 122 debug.AssertNoErr(err) 123 return xctn.(*XactPut) 124 } 125 126 func (*Manager) RestoreBckRespXact(bck *meta.Bck) *XactRespond { 127 xctn, err := _renewXact(bck, apc.ActECRespond) 128 debug.AssertNoErr(err) 129 return xctn.(*XactRespond) 130 } 131 132 func _renewXact(bck *meta.Bck, kind string) (core.Xact, error) { 133 rns := xreg.RenewBucketXact(kind, bck, xreg.Args{}) 134 if rns.Err != nil { 135 return nil, rns.Err 136 } 137 return rns.Entry.Get(), nil 138 } 139 140 // A function to process command requests from other targets 141 func (mgr *Manager) recvRequest(hdr *transport.ObjHdr, objReader io.Reader, err error) error { 142 defer transport.FreeRecv(objReader) 143 if err != nil { 144 nlog.Errorf("request failed: %v", err) 145 return err 146 } 147 // check if the header contains a valid request 148 if len(hdr.Opaque) == 0 { 149 err := fmt.Errorf("invalid header: [%+v]", hdr) 150 nlog.Errorln(err) 151 return err 152 } 153 154 unpacker := cos.NewUnpacker(hdr.Opaque) 155 iReq := intraReq{} 156 if err := unpacker.ReadAny(&iReq); err != nil { 157 nlog.Errorf("failed to unmarshal request: %v", err) 158 return err 159 } 160 161 // command requests should not have a body, but if it has, 162 // the body must be drained to avoid errors 163 if hdr.ObjAttrs.Size != 0 { 164 if _, err := io.ReadAll(objReader); err != nil { 165 nlog.Errorf("failed to read request body: %v", err) 166 return err 167 } 168 } 169 bck := meta.CloneBck(&hdr.Bck) 170 if err = bck.Init(core.T.Bowner()); err != nil { 171 if _, ok := err.(*cmn.ErrRemoteBckNotFound); !ok { // is ais 172 nlog.Errorf("failed to init bucket %s: %v", bck, err) 173 return err 174 } 175 } 176 mgr.RestoreBckRespXact(bck).DispatchReq(iReq, hdr, bck) 177 return nil 178 } 179 180 // A function to process big chunks of data (replica/slice/meta) sent from other targets 181 func (mgr *Manager) recvResponse(hdr *transport.ObjHdr, objReader io.Reader, err error) error { 182 defer transport.DrainAndFreeReader(objReader) 183 if err != nil { 184 nlog.Errorln("failed to receive response:", err) 185 return err 186 } 187 // check if the request is valid 188 if len(hdr.Opaque) == 0 { 189 err := fmt.Errorf("invalid header: [%+v]", hdr) 190 nlog.Errorln(err) 191 return err 192 } 193 194 unpacker := cos.NewUnpacker(hdr.Opaque) 195 iReq := intraReq{} 196 if err := unpacker.ReadAny(&iReq); err != nil { 197 nlog.Errorln("failed to unpack request:", err) 198 return err 199 } 200 bck := meta.CloneBck(&hdr.Bck) 201 if err = bck.Init(core.T.Bowner()); err != nil { 202 if _, ok := err.(*cmn.ErrRemoteBckNotFound); !ok { // is ais 203 nlog.Errorln(err) 204 return err 205 } 206 } 207 switch hdr.Opcode { 208 case reqPut: 209 mgr.RestoreBckRespXact(bck).DispatchResp(iReq, hdr, objReader) 210 case respPut: 211 // Process the request even if the number of targets is insufficient 212 // (might've started when we had enough) 213 mgr.RestoreBckGetXact(bck).DispatchResp(iReq, hdr, bck, objReader) 214 default: 215 debug.Assertf(false, "unknown EC response action %d", hdr.Opcode) 216 } 217 return nil 218 } 219 220 // EncodeObject generates slices using Reed-Solom algorithm: 221 // - lom - object to encode 222 // - intra - if true, it is internal request and has low priority 223 // - cb - optional callback that is called after the object is encoded 224 func (mgr *Manager) EncodeObject(lom *core.LOM, cb core.OnFinishObj) error { 225 if !lom.ECEnabled() { 226 return ErrorECDisabled 227 } 228 cs := fs.Cap() 229 if err := cs.Err(); err != nil { 230 return err 231 } 232 spec, _ := fs.CSM.FileSpec(lom.FQN) 233 if spec != nil && !spec.PermToProcess() { 234 return errSkipped 235 } 236 237 req := allocateReq(ActSplit, lom.LIF()) 238 req.IsCopy = IsECCopy(lom.SizeBytes(), &lom.Bprops().EC) 239 if cb != nil { 240 req.rebuild = true 241 req.Callback = cb 242 } 243 244 mgr.RestoreBckPutXact(lom.Bck()).encode(req, lom) 245 246 return nil 247 } 248 249 func (mgr *Manager) CleanupObject(lom *core.LOM) { 250 if !lom.ECEnabled() { 251 return 252 } 253 debug.Assert(lom.FQN != "" && lom.Mountpath().Path != "") 254 req := allocateReq(ActDelete, lom.LIF()) 255 mgr.RestoreBckPutXact(lom.Bck()).cleanup(req, lom) 256 } 257 258 func (mgr *Manager) RestoreObject(lom *core.LOM) error { 259 if !lom.ECEnabled() { 260 return ErrorECDisabled 261 } 262 cs := fs.Cap() 263 if err := cs.Err(); err != nil { 264 return err 265 } 266 267 debug.Assert(lom.Mountpath() != nil && lom.Mountpath().Path != "") 268 req := allocateReq(ActRestore, lom.LIF()) 269 errCh := make(chan error) // unbuffered 270 req.ErrCh = errCh 271 mgr.RestoreBckGetXact(lom.Bck()).decode(req, lom) 272 273 // wait for EC completes restoring the object 274 return <-errCh 275 } 276 277 // disableBck starts to reject new EC requests, rejects pending ones 278 func (mgr *Manager) disableBck(bck *meta.Bck) { 279 mgr.RestoreBckGetXact(bck).ClearRequests() 280 mgr.RestoreBckPutXact(bck).ClearRequests() 281 } 282 283 // enableBck aborts xctn disable and starts to accept new EC requests 284 // enableBck uses the same channel as disableBck, so order of executing them is the same as 285 // order which they arrived to a target in 286 func (mgr *Manager) enableBck(bck *meta.Bck) { 287 mgr.RestoreBckGetXact(bck).EnableRequests() 288 mgr.RestoreBckPutXact(bck).EnableRequests() 289 } 290 291 func (mgr *Manager) BMDChanged() error { 292 newBMD := core.T.Bowner().Get() 293 oldBMD := mgr.bmd 294 if newBMD.Version <= mgr.bmd.Version { 295 return nil 296 } 297 mgr.bmd = newBMD 298 299 // globally 300 if newBMD.IsECUsed() && !oldBMD.IsECUsed() { 301 if err := mgr.initECBundles(); err != nil { 302 return err 303 } 304 } else if !newBMD.IsECUsed() && oldBMD.IsECUsed() { 305 mgr.closeECBundles() 306 return nil 307 } 308 309 // by bucket 310 newBMD.Range(nil, nil, func(nbck *meta.Bck) bool { 311 oprops, ok := oldBMD.Get(nbck) 312 if !ok { 313 if nbck.Props.EC.Enabled { 314 mgr.enableBck(nbck) 315 } 316 return false 317 } 318 if !oprops.EC.Enabled && nbck.Props.EC.Enabled { 319 mgr.enableBck(nbck) 320 } else if oprops.EC.Enabled && !nbck.Props.EC.Enabled { 321 mgr.disableBck(nbck) 322 } 323 324 return false 325 }) 326 return nil 327 }