github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/xact/xs/tcb.go (about) 1 // Package xs is a collection of eXtended actions (xactions), including multi-object 2 // operations, list-objects, (cluster) rebalance and (target) resilver, ETL, and more. 3 /* 4 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 5 */ 6 package xs 7 8 import ( 9 "fmt" 10 "io" 11 "sync" 12 "time" 13 14 "github.com/NVIDIA/aistore/api/apc" 15 "github.com/NVIDIA/aistore/cmn" 16 "github.com/NVIDIA/aistore/cmn/atomic" 17 "github.com/NVIDIA/aistore/cmn/cos" 18 "github.com/NVIDIA/aistore/cmn/debug" 19 "github.com/NVIDIA/aistore/cmn/mono" 20 "github.com/NVIDIA/aistore/cmn/nlog" 21 "github.com/NVIDIA/aistore/core" 22 "github.com/NVIDIA/aistore/core/meta" 23 "github.com/NVIDIA/aistore/fs" 24 "github.com/NVIDIA/aistore/fs/mpather" 25 "github.com/NVIDIA/aistore/memsys" 26 "github.com/NVIDIA/aistore/transport" 27 "github.com/NVIDIA/aistore/transport/bundle" 28 "github.com/NVIDIA/aistore/xact" 29 "github.com/NVIDIA/aistore/xact/xreg" 30 ) 31 32 type ( 33 tcbFactory struct { 34 xreg.RenewBase 35 xctn *XactTCB 36 kind string 37 phase string // (see "transition") 38 args *xreg.TCBArgs 39 owt cmn.OWT 40 } 41 XactTCB struct { 42 p *tcbFactory 43 dm *bundle.DataMover 44 rxlast atomic.Int64 // finishing 45 xact.BckJog 46 prune prune 47 nam, str string 48 wg sync.WaitGroup // starting up 49 refc atomic.Int32 // finishing 50 } 51 ) 52 53 const OpcTxnDone = 27182 54 55 const etlBucketParallelCnt = 2 56 57 // interface guard 58 var ( 59 _ core.Xact = (*XactTCB)(nil) 60 _ xreg.Renewable = (*tcbFactory)(nil) 61 ) 62 63 //////////////// 64 // tcbFactory // 65 //////////////// 66 67 func (p *tcbFactory) New(args xreg.Args, bck *meta.Bck) xreg.Renewable { 68 custom := args.Custom.(*xreg.TCBArgs) 69 return &tcbFactory{RenewBase: xreg.RenewBase{Args: args, Bck: bck}, kind: p.kind, phase: custom.Phase, args: custom} 70 } 71 72 func (p *tcbFactory) Start() error { 73 var ( 74 config = cmn.GCO.Get() 75 slab, err = core.T.PageMM().GetSlab(memsys.MaxPageSlabSize) // TODO: estimate 76 ) 77 debug.AssertNoErr(err) 78 79 p.owt = cmn.OwtCopy 80 if p.kind == apc.ActETLBck { 81 p.owt = cmn.OwtTransform 82 } 83 84 smap := core.T.Sowner().Get() 85 p.xctn = newTCB(p, slab, config, smap) 86 87 // refcount OpcTxnDone; this target must ve active (ref: ignoreMaintenance) 88 if err := core.InMaintOrDecomm(smap, core.T.Snode(), p.xctn); err != nil { 89 return err 90 } 91 nat := smap.CountActiveTs() 92 p.xctn.refc.Store(int32(nat - 1)) 93 p.xctn.wg.Add(1) 94 95 var sizePDU int32 96 if p.kind == apc.ActETLBck { 97 sizePDU = memsys.DefaultBufSize 98 } 99 if nat <= 1 { 100 return nil 101 } 102 return p.newDM(config, p.UUID(), sizePDU) 103 } 104 105 func (p *tcbFactory) newDM(config *cmn.Config, uuid string, sizePDU int32) error { 106 const trname = "tcb" 107 dmExtra := bundle.Extra{ 108 RecvAck: nil, // no ACKs 109 Config: config, 110 Compression: config.TCB.Compression, 111 Multiplier: config.TCB.SbundleMult, 112 SizePDU: sizePDU, 113 } 114 // in re cmn.OwtPut: see comment inside _recv() 115 dm, err := bundle.NewDataMover(trname+"-"+uuid, p.xctn.recv, p.owt, dmExtra) 116 if err != nil { 117 return err 118 } 119 if err := dm.RegRecv(); err != nil { 120 return err 121 } 122 dm.SetXact(p.xctn) 123 p.xctn.dm = dm 124 return nil 125 } 126 127 func (p *tcbFactory) Kind() string { return p.kind } 128 func (p *tcbFactory) Get() core.Xact { return p.xctn } 129 130 func (p *tcbFactory) WhenPrevIsRunning(prevEntry xreg.Renewable) (wpr xreg.WPR, err error) { 131 prev := prevEntry.(*tcbFactory) 132 if p.UUID() != prev.UUID() { 133 err = cmn.NewErrXactUsePrev(prevEntry.Get().String()) 134 return 135 } 136 bckEq := prev.args.BckFrom.Equal(p.args.BckFrom, true /*same BID*/, true /*same backend*/) 137 debug.Assert(bckEq) 138 debug.Assert(prev.phase == apc.ActBegin && p.phase == apc.ActCommit) 139 prev.args.Phase = apc.ActCommit // transition 140 wpr = xreg.WprUse 141 return 142 } 143 144 ///////////// 145 // XactTCB // 146 ///////////// 147 148 // copies one bucket _into_ another with or without transformation. 149 // args.DP.Reader() is the reader to receive transformed bytes; when nil we do a plain bucket copy. 150 151 // limited pre-run abort 152 func (r *XactTCB) TxnAbort(err error) { 153 err = cmn.NewErrAborted(r.Name(), "tcb: txn-abort", err) 154 r.dm.Close(err) 155 r.dm.UnregRecv() 156 r.AddErr(err) 157 r.Base.Finish() 158 } 159 160 func newTCB(p *tcbFactory, slab *memsys.Slab, config *cmn.Config, smap *meta.Smap) (r *XactTCB) { 161 r = &XactTCB{p: p} 162 163 s1, s2 := r._str(), r.p.args.BckFrom.String() 164 r.nam = r.Base.Name() + " <= " + s2 + s1 165 r.str = r.Base.String() + " <= " + s2 + s1 166 167 var parallel int 168 if p.kind == apc.ActETLBck { 169 parallel = etlBucketParallelCnt // TODO: optimize with respect to disk bw and transforming computation 170 } 171 mpopts := &mpather.JgroupOpts{ 172 CTs: []string{fs.ObjectType}, 173 VisitObj: r.do, 174 Prefix: p.args.Msg.Prefix, 175 Slab: slab, 176 Parallel: parallel, 177 DoLoad: mpather.Load, 178 Throttle: true, // always trottling 179 } 180 mpopts.Bck.Copy(p.args.BckFrom.Bucket()) 181 r.BckJog.Init(p.UUID(), p.kind, p.args.BckTo, mpopts, config) 182 183 if p.args.Msg.Sync { 184 debug.Assert(p.args.Msg.Prepend == "", p.args.Msg.Prepend) // validated (cli, P) 185 { 186 r.prune.parent = r 187 r.prune.smap = smap 188 r.prune.bckFrom = p.args.BckFrom 189 r.prune.bckTo = p.args.BckTo 190 r.prune.prefix = p.args.Msg.Prefix 191 } 192 r.prune.init(config) 193 } 194 return 195 } 196 197 func (r *XactTCB) WaitRunning() { r.wg.Wait() } 198 199 func (r *XactTCB) Run(wg *sync.WaitGroup) { 200 if r.dm != nil { 201 r.dm.SetXact(r) 202 r.dm.Open() 203 } 204 wg.Done() 205 206 r.wg.Done() 207 208 r.BckJog.Run() 209 if r.p.args.Msg.Sync { 210 r.prune.run() // the 2nd jgroup 211 } 212 nlog.Infoln(r.Name()) 213 214 err := r.BckJog.Wait() 215 216 if r.dm != nil { 217 o := transport.AllocSend() 218 o.Hdr.Opcode = OpcTxnDone 219 r.dm.Bcast(o, nil) 220 221 q := r.Quiesce(cmn.Rom.CplaneOperation(), r.qcb) 222 if q == core.QuiTimeout { 223 r.AddErr(fmt.Errorf("%s: %v", r, cmn.ErrQuiesceTimeout)) 224 } 225 226 // close 227 r.dm.Close(err) 228 r.dm.UnregRecv() 229 } 230 if r.p.args.Msg.Sync { 231 r.prune.wait() 232 } 233 r.Finish() 234 } 235 236 func (r *XactTCB) qcb(tot time.Duration) core.QuiRes { 237 // TODO -- FIXME ======================= 238 if cnt := r.ErrCnt(); cnt > 0 { 239 // to break quiescence - the waiter will look at r.Err() first anyway 240 return core.QuiTimeout 241 } 242 243 since := mono.Since(r.rxlast.Load()) 244 if r.refc.Load() > 0 { 245 if since > cmn.Rom.MaxKeepalive() { 246 // idle on the Rx side despite having some (refc > 0) senders 247 if tot > r.BckJog.Config.Timeout.SendFile.D() { 248 return core.QuiTimeout 249 } 250 } 251 return core.QuiActive 252 } 253 if since > cmn.Rom.CplaneOperation() { 254 return core.QuiDone 255 } 256 return core.QuiInactiveCB 257 } 258 259 func (r *XactTCB) do(lom *core.LOM, buf []byte) (err error) { 260 var ( 261 args = r.p.args // TCBArgs 262 toName = args.Msg.ToName(lom.ObjName) 263 ) 264 if cmn.Rom.FastV(5, cos.SmoduleXs) { 265 nlog.Infoln(r.Base.Name()+":", lom.Cname(), "=>", args.BckTo.Cname(toName)) 266 } 267 coiParams := core.AllocCOI() 268 { 269 coiParams.DP = args.DP 270 coiParams.Xact = r 271 coiParams.Config = r.Config 272 coiParams.BckTo = args.BckTo 273 coiParams.ObjnameTo = toName 274 coiParams.Buf = buf 275 coiParams.OWT = r.p.owt 276 coiParams.DryRun = args.Msg.DryRun 277 coiParams.LatestVer = args.Msg.LatestVer 278 coiParams.Sync = args.Msg.Sync 279 } 280 _, err = core.T.CopyObject(lom, r.dm, coiParams) 281 core.FreeCOI(coiParams) 282 switch { 283 case err == nil: 284 if args.Msg.Sync { 285 r.prune.filter.Insert(cos.UnsafeB(lom.Uname())) 286 } 287 case cos.IsNotExist(err, 0): 288 // do nothing 289 case cos.IsErrOOS(err): 290 r.Abort(err) 291 default: 292 r.AddErr(err, 5, cos.SmoduleXs) 293 } 294 return 295 } 296 297 // NOTE: strict(est) error handling: abort on any of the errors below 298 func (r *XactTCB) recv(hdr *transport.ObjHdr, objReader io.Reader, err error) error { 299 if err != nil && !cos.IsEOF(err) { 300 nlog.Errorln(err) 301 return err 302 } 303 // ref-count done-senders 304 if hdr.Opcode == OpcTxnDone { 305 refc := r.refc.Dec() 306 debug.Assert(refc >= 0) 307 return nil 308 } 309 310 debug.Assert(hdr.Opcode == 0) 311 lom := core.AllocLOM(hdr.ObjName) 312 err = r._recv(hdr, objReader, lom) 313 core.FreeLOM(lom) 314 transport.DrainAndFreeReader(objReader) 315 return err 316 } 317 318 func (r *XactTCB) _recv(hdr *transport.ObjHdr, objReader io.Reader, lom *core.LOM) error { 319 if err := lom.InitBck(&hdr.Bck); err != nil { 320 r.AddErr(err, 0) 321 return err 322 } 323 lom.CopyAttrs(&hdr.ObjAttrs, true /*skip cksum*/) 324 params := core.AllocPutParams() 325 { 326 params.WorkTag = fs.WorkfilePut 327 params.Reader = io.NopCloser(objReader) 328 params.Cksum = hdr.ObjAttrs.Cksum 329 params.Xact = r 330 params.Size = hdr.ObjAttrs.Size 331 params.OWT = r.p.owt 332 } 333 if lom.AtimeUnix() == 0 { 334 // TODO: sender must be setting it, remove this `if` when fixed 335 lom.SetAtimeUnix(time.Now().UnixNano()) 336 } 337 params.Atime = lom.Atime() 338 339 erp := core.T.PutObject(lom, params) 340 core.FreePutParams(params) 341 if erp != nil { 342 r.AddErr(erp, 0) 343 return erp // NOTE: non-nil signals transport to terminate 344 } 345 r.rxlast.Store(mono.NanoTime()) 346 return nil 347 } 348 349 func (r *XactTCB) Args() *xreg.TCBArgs { return r.p.args } 350 351 func (r *XactTCB) _str() (s string) { 352 msg := &r.p.args.Msg.CopyBckMsg 353 if msg.Prefix != "" { 354 s = ", prefix " + r.p.args.Msg.Prefix 355 } 356 if msg.Prepend != "" { 357 s = ", prepend " + r.p.args.Msg.Prepend 358 } 359 if msg.LatestVer { 360 s = ", latest-ver" 361 } 362 if msg.Sync { 363 s = ", synchronize" 364 } 365 return s 366 } 367 368 func (r *XactTCB) String() string { return r.str } 369 func (r *XactTCB) Name() string { return r.nam } 370 371 func (r *XactTCB) FromTo() (*meta.Bck, *meta.Bck) { 372 return r.p.args.BckFrom, r.p.args.BckTo 373 } 374 375 func (r *XactTCB) Snap() (snap *core.Snap) { 376 snap = &core.Snap{} 377 r.ToSnap(snap) 378 379 snap.IdleX = r.IsIdle() 380 f, t := r.FromTo() 381 snap.SrcBck, snap.DstBck = f.Clone(), t.Clone() 382 return 383 }