github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/xact/xs/tcobjs.go (about) 1 // Package xs is a collection of eXtended actions (xactions), including multi-object 2 // operations, list-objects, (cluster) rebalance and (target) resilver, ETL, and more. 3 /* 4 * Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. 5 */ 6 package xs 7 8 import ( 9 "fmt" 10 "io" 11 "runtime" 12 "sync" 13 "time" 14 15 "github.com/NVIDIA/aistore/api/apc" 16 "github.com/NVIDIA/aistore/cmn" 17 "github.com/NVIDIA/aistore/cmn/atomic" 18 "github.com/NVIDIA/aistore/cmn/cos" 19 "github.com/NVIDIA/aistore/cmn/debug" 20 "github.com/NVIDIA/aistore/cmn/nlog" 21 "github.com/NVIDIA/aistore/core" 22 "github.com/NVIDIA/aistore/core/meta" 23 "github.com/NVIDIA/aistore/fs" 24 "github.com/NVIDIA/aistore/memsys" 25 "github.com/NVIDIA/aistore/transport" 26 "github.com/NVIDIA/aistore/xact" 27 "github.com/NVIDIA/aistore/xact/xreg" 28 ) 29 30 const PrefixTcoID = "tco-" 31 32 type ( 33 tcoFactory struct { 34 args *xreg.TCObjsArgs 35 streamingF 36 } 37 XactTCObjs struct { 38 pending struct { 39 m map[string]*tcowi 40 mtx sync.RWMutex 41 } 42 args *xreg.TCObjsArgs 43 workCh chan *cmn.TCObjsMsg 44 chanFull atomic.Int64 45 streamingX 46 owt cmn.OWT 47 } 48 tcowi struct { 49 r *XactTCObjs 50 msg *cmn.TCObjsMsg 51 // finishing 52 refc atomic.Int32 53 } 54 ) 55 56 // interface guard 57 var ( 58 _ core.Xact = (*XactTCObjs)(nil) 59 _ xreg.Renewable = (*tcoFactory)(nil) 60 _ lrwi = (*tcowi)(nil) 61 ) 62 63 //////////////// 64 // tcoFactory // 65 //////////////// 66 67 func (p *tcoFactory) New(args xreg.Args, bckFrom *meta.Bck) xreg.Renewable { 68 np := &tcoFactory{streamingF: streamingF{RenewBase: xreg.RenewBase{Args: args, Bck: bckFrom}, kind: p.kind}} 69 np.args = args.Custom.(*xreg.TCObjsArgs) 70 return np 71 } 72 73 func (p *tcoFactory) Start() error { 74 // 75 // target-local generation of a global UUID 76 // 77 uuid, err := p.genBEID(p.args.BckFrom, p.args.BckTo) 78 if err != nil { 79 return err 80 } 81 p.Args.UUID = PrefixTcoID + uuid 82 83 // new x-tco 84 workCh := make(chan *cmn.TCObjsMsg, maxNumInParallel) 85 r := &XactTCObjs{streamingX: streamingX{p: &p.streamingF, config: cmn.GCO.Get()}, args: p.args, workCh: workCh} 86 r.pending.m = make(map[string]*tcowi, maxNumInParallel) 87 r.owt = cmn.OwtCopy 88 if p.kind == apc.ActETLObjects { 89 r.owt = cmn.OwtTransform 90 } 91 p.xctn = r 92 r.DemandBase.Init(p.UUID(), p.Kind(), p.Bck, xact.IdleDefault) 93 94 var sizePDU int32 95 if p.kind == apc.ActETLObjects { 96 // unlike apc.ActCopyObjects (where we know the size) 97 // apc.ActETLObjects (transform) generates arbitrary sizes where we use PDU-based transport 98 sizePDU = memsys.DefaultBufSize 99 } 100 101 if err := p.newDM(p.Args.UUID /*trname*/, r.recv, r.config, r.owt, sizePDU); err != nil { 102 return err 103 } 104 105 if r.p.dm != nil { 106 p.dm.SetXact(r) 107 p.dm.Open() 108 } 109 xact.GoRunW(r) 110 return nil 111 } 112 113 //////////////// 114 // XactTCObjs // 115 //////////////// 116 117 func (r *XactTCObjs) Name() string { 118 return fmt.Sprintf("%s => %s", r.streamingX.Name(), r.args.BckTo) 119 } 120 121 func (r *XactTCObjs) String() string { 122 return r.streamingX.String() + " => " + r.args.BckTo.String() 123 } 124 125 func (r *XactTCObjs) FromTo() (*meta.Bck, *meta.Bck) { return r.args.BckFrom, r.args.BckTo } 126 127 func (r *XactTCObjs) Snap() (snap *core.Snap) { 128 snap = &core.Snap{} 129 r.ToSnap(snap) 130 131 snap.IdleX = r.IsIdle() 132 f, t := r.FromTo() 133 snap.SrcBck, snap.DstBck = f.Clone(), t.Clone() 134 return 135 } 136 137 func (r *XactTCObjs) Begin(msg *cmn.TCObjsMsg) { 138 wi := &tcowi{r: r, msg: msg} 139 r.pending.mtx.Lock() 140 r.pending.m[msg.TxnUUID] = wi 141 r.wiCnt.Inc() 142 r.pending.mtx.Unlock() 143 } 144 145 func (r *XactTCObjs) Run(wg *sync.WaitGroup) { 146 var err error 147 nlog.Infoln(r.Name()) 148 wg.Done() 149 for { 150 select { 151 case msg := <-r.workCh: 152 var ( 153 smap = core.T.Sowner().Get() 154 lrit = &lriterator{} 155 ) 156 debug.Assert(cos.IsValidUUID(msg.TxnUUID), msg.TxnUUID) // (ref050724: in re: ais/plstcx) 157 r.pending.mtx.Lock() 158 wi, ok := r.pending.m[msg.TxnUUID] 159 r.pending.mtx.Unlock() 160 if !ok { 161 if r.ErrCnt() > 0 { 162 goto fin 163 } 164 nlog.Errorf("%s: expecting errors in %s, missing txn %q", core.T.String(), r.String(), msg.TxnUUID) // (unlikely) 165 continue 166 } 167 168 // this target must be active (ref: ignoreMaintenance) 169 if err = core.InMaintOrDecomm(smap, core.T.Snode(), r); err != nil { 170 nlog.Errorln(err) 171 goto fin 172 } 173 nat := smap.CountActiveTs() 174 wi.refc.Store(int32(nat - 1)) 175 176 // run 177 var wg *sync.WaitGroup 178 if err = lrit.init(r, &msg.ListRange, r.Bck()); err == nil { 179 if msg.Sync && lrit.lrp != lrpList { 180 wg = &sync.WaitGroup{} 181 wg.Add(1) 182 go func(pt *cos.ParsedTemplate) { 183 r.prune(lrit, smap, pt) 184 wg.Done() 185 }(lrit.pt.Clone()) 186 } 187 err = lrit.run(wi, smap) 188 } 189 if wg != nil { 190 wg.Wait() 191 } 192 lrit.wait() 193 194 if r.IsAborted() || err != nil { 195 goto fin 196 } 197 r.sendTerm(wi.msg.TxnUUID, nil, nil) 198 r.DecPending() 199 case <-r.IdleTimer(): 200 goto fin 201 case <-r.ChanAbort(): 202 goto fin 203 } 204 } 205 fin: 206 r.fin(true /*unreg Rx*/) 207 if r.ErrCnt() > 0 { 208 // (see "expecting errors" and cleanup) 209 r.pending.mtx.Lock() 210 clear(r.pending.m) 211 r.pending.mtx.Unlock() 212 } 213 } 214 215 // more work 216 func (r *XactTCObjs) Do(msg *cmn.TCObjsMsg) { 217 r.IncPending() 218 r.workCh <- msg 219 220 if l, c := len(r.workCh), cap(r.workCh); l > c/2 { 221 runtime.Gosched() // poor man's throttle 222 if l == c { 223 cnt := r.chanFull.Inc() 224 if (cnt >= 10 && cnt <= 20) || (cnt > 0 && cmn.Rom.FastV(5, cos.SmoduleXs)) { 225 nlog.Errorln("work channel full", r.Name()) 226 } 227 } 228 } 229 } 230 231 // 232 // Rx 233 // 234 235 // NOTE: strict(est) error handling: abort on any of the errors below 236 func (r *XactTCObjs) recv(hdr *transport.ObjHdr, objReader io.Reader, err error) error { 237 if err != nil && !cos.IsEOF(err) { 238 goto ex 239 } 240 241 r.IncPending() 242 err = r._recv(hdr, objReader) 243 r.DecPending() 244 transport.DrainAndFreeReader(objReader) 245 ex: 246 if err != nil && cmn.Rom.FastV(4, cos.SmoduleXs) { 247 nlog.Errorln(err) 248 } 249 return err 250 } 251 252 func (r *XactTCObjs) _recv(hdr *transport.ObjHdr, objReader io.Reader) error { 253 if hdr.Opcode == opcodeDone { 254 r.pending.mtx.Lock() 255 wi, ok := r.pending.m[cos.UnsafeS(hdr.Opaque)] // txnUUID 256 if !ok { 257 r.pending.mtx.Unlock() 258 _, err := r.JoinErr() 259 return err 260 } 261 refc := wi.refc.Dec() 262 if refc == 0 { 263 r.wiCnt.Dec() 264 } 265 r.pending.mtx.Unlock() 266 return nil 267 } 268 269 debug.Assert(hdr.Opcode == 0) 270 lom := core.AllocLOM(hdr.ObjName) 271 err := r._put(hdr, objReader, lom) 272 core.FreeLOM(lom) 273 return err 274 } 275 276 func (r *XactTCObjs) _put(hdr *transport.ObjHdr, objReader io.Reader, lom *core.LOM) (err error) { 277 if err = lom.InitBck(&hdr.Bck); err != nil { 278 return 279 } 280 lom.CopyAttrs(&hdr.ObjAttrs, true /*skip cksum*/) 281 params := core.AllocPutParams() 282 { 283 params.WorkTag = fs.WorkfilePut 284 params.Reader = io.NopCloser(objReader) 285 params.Cksum = hdr.ObjAttrs.Cksum 286 params.Xact = r 287 params.Size = hdr.ObjAttrs.Size 288 params.OWT = r.owt 289 } 290 if lom.AtimeUnix() == 0 { 291 // TODO: sender must be setting it, remove this `if` when fixed 292 lom.SetAtimeUnix(time.Now().UnixNano()) 293 } 294 params.Atime = lom.Atime() 295 err = core.T.PutObject(lom, params) 296 core.FreePutParams(params) 297 298 if err != nil { 299 r.AddErr(err, 5, cos.SmoduleXs) 300 } else if cmn.Rom.FastV(5, cos.SmoduleXs) { 301 nlog.Infof("%s: tco-Rx %s, size=%d", r.Base.Name(), lom.Cname(), hdr.ObjAttrs.Size) 302 } 303 return 304 } 305 306 /////////// 307 // tcowi // 308 /////////// 309 310 func (wi *tcowi) do(lom *core.LOM, lrit *lriterator) { 311 var ( 312 objNameTo = wi.msg.ToName(lom.ObjName) 313 buf, slab = core.T.PageMM().Alloc() 314 ) 315 316 // under ETL, the returned sizes of transformed objects are unknown (`cos.ContentLengthUnknown`) 317 // until after the transformation; here we are disregarding the size anyway as the stats 318 // are done elsewhere 319 320 coiParams := core.AllocCOI() 321 { 322 coiParams.DP = wi.r.args.DP 323 coiParams.Xact = wi.r 324 coiParams.Config = wi.r.config 325 coiParams.BckTo = wi.r.args.BckTo 326 coiParams.ObjnameTo = objNameTo 327 coiParams.Buf = buf 328 coiParams.OWT = wi.r.owt 329 coiParams.DryRun = wi.msg.DryRun 330 coiParams.LatestVer = wi.msg.LatestVer 331 coiParams.Sync = wi.msg.Sync 332 } 333 _, err := core.T.CopyObject(lom, wi.r.p.dm, coiParams) 334 core.FreeCOI(coiParams) 335 slab.Free(buf) 336 337 if err != nil { 338 if !cos.IsNotExist(err, 0) || lrit.lrp == lrpList { 339 wi.r.AddErr(err, 5, cos.SmoduleXs) 340 } 341 } else if cmn.Rom.FastV(5, cos.SmoduleXs) { 342 nlog.Infoln(wi.r.Name()+":", lom.Cname(), "=>", wi.r.args.BckTo.Cname(objNameTo)) 343 } 344 } 345 346 // 347 // remove objects not present at the source (when synchronizing bckFrom => bckTo) 348 // TODO: probabilistic filtering 349 // 350 351 type syncwi struct { 352 rp *prune 353 } 354 355 // interface guard 356 var _ lrwi = (*syncwi)(nil) 357 358 func (r *XactTCObjs) prune(lrit *lriterator, smap *meta.Smap, pt *cos.ParsedTemplate) { 359 rp := prune{parent: r, smap: smap} 360 rp.bckFrom, rp.bckTo = r.FromTo() 361 362 // tcb use case 363 if lrit.lrp == lrpPrefix { 364 rp.prefix = lrit.prefix 365 rp.init(r.config) 366 rp.run() 367 rp.wait() 368 return 369 } 370 371 // same range iterator but different bucket 372 var syncit lriterator 373 debug.Assert(lrit.lrp == lrpRange) 374 375 err := syncit.init(lrit.parent, lrit.msg, rp.bckTo) 376 debug.AssertNoErr(err) 377 syncit.pt = pt 378 syncwi := &syncwi{&rp} // reusing only prune.do (and not init/run/wait) 379 syncit.run(syncwi, smap) 380 syncit.wait() 381 } 382 383 func (syncwi *syncwi) do(lom *core.LOM, _ *lriterator) { 384 syncwi.rp.do(lom, nil) 385 }