github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/mirror/put_copies.go (about) 1 // Package mirror provides local mirroring and replica management 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package mirror 6 7 import ( 8 "fmt" 9 "sync" 10 "time" 11 12 "github.com/NVIDIA/aistore/api/apc" 13 "github.com/NVIDIA/aistore/cmn" 14 "github.com/NVIDIA/aistore/cmn/atomic" 15 "github.com/NVIDIA/aistore/cmn/cos" 16 "github.com/NVIDIA/aistore/cmn/debug" 17 "github.com/NVIDIA/aistore/cmn/mono" 18 "github.com/NVIDIA/aistore/cmn/nlog" 19 "github.com/NVIDIA/aistore/core" 20 "github.com/NVIDIA/aistore/core/meta" 21 "github.com/NVIDIA/aistore/fs" 22 "github.com/NVIDIA/aistore/fs/mpather" 23 "github.com/NVIDIA/aistore/memsys" 24 "github.com/NVIDIA/aistore/xact" 25 "github.com/NVIDIA/aistore/xact/xreg" 26 ) 27 28 type ( 29 putFactory struct { 30 xreg.RenewBase 31 xctn *XactPut 32 lom *core.LOM 33 } 34 XactPut struct { 35 // implements core.Xact interface 36 xact.DemandBase 37 // runtime 38 workers *mpather.WorkerGroup 39 workCh chan core.LIF 40 chanFull atomic.Int64 41 // init 42 mirror cmn.MirrorConf 43 config *cmn.Config 44 } 45 ) 46 47 // interface guard 48 var ( 49 _ core.Xact = (*XactPut)(nil) 50 _ xreg.Renewable = (*putFactory)(nil) 51 ) 52 53 //////////////// 54 // putFactory // 55 //////////////// 56 57 func (*putFactory) New(args xreg.Args, bck *meta.Bck) xreg.Renewable { 58 p := &putFactory{RenewBase: xreg.RenewBase{Args: args, Bck: bck}, lom: args.Custom.(*core.LOM)} 59 return p 60 } 61 62 func (p *putFactory) Start() error { 63 lom := p.lom 64 slab, err := core.T.PageMM().GetSlab(memsys.MaxPageSlabSize) // TODO: estimate 65 debug.AssertNoErr(err) 66 67 bck, mirror := lom.Bck(), lom.MirrorConf() 68 if !mirror.Enabled { 69 return fmt.Errorf("%s: mirroring disabled, nothing to do", bck) 70 } 71 if err = fs.ValidateNCopies(core.T.String(), int(mirror.Copies)); err != nil { 72 nlog.Errorln(err) 73 return err 74 } 75 r := &XactPut{mirror: *mirror, workCh: make(chan core.LIF, mirror.Burst)} 76 77 // 78 // target-local generation of a global UUID 79 // 80 div := uint64(xact.IdleDefault) 81 beid, _, _ := xreg.GenBEID(div, p.Kind()+"|"+bck.MakeUname("")) 82 if beid == "" { 83 // is Ok (compare with x-archive, x-tco) 84 beid = cos.GenUUID() 85 } 86 r.DemandBase.Init(beid, p.Kind(), bck, xact.IdleDefault) 87 88 // joggers 89 r.workers = mpather.NewWorkerGroup(&mpather.WorkerGroupOpts{ 90 Callback: r.do, 91 Slab: slab, 92 QueueSize: mirror.Burst, 93 }) 94 p.xctn = r 95 96 // run 97 go r.Run(nil) 98 return nil 99 } 100 101 func (*putFactory) Kind() string { return apc.ActPutCopies } 102 func (p *putFactory) Get() core.Xact { return p.xctn } 103 104 func (p *putFactory) WhenPrevIsRunning(xprev xreg.Renewable) (xreg.WPR, error) { 105 debug.Assertf(false, "%s vs %s", p.Str(p.Kind()), xprev) // xreg.usePrev() must've returned true 106 return xreg.WprUse, nil 107 } 108 109 ///////////// 110 // XactPut // 111 ///////////// 112 113 // (one worker per mountpath) 114 func (r *XactPut) do(lom *core.LOM, buf []byte) { 115 copies := int(lom.Bprops().Mirror.Copies) 116 117 lom.Lock(true) 118 size, err := addCopies(lom, copies, buf) 119 lom.Unlock(true) 120 121 if err != nil { 122 r.AddErr(err, 5, cos.SmoduleMirror) 123 } else { 124 r.ObjsAdd(1, size) 125 } 126 r.DecPending() // (see IncPending below) 127 core.FreeLOM(lom) 128 } 129 130 // control logic: stop and idle timer 131 // (LOMs get dispatched directly to workers) 132 func (r *XactPut) Run(*sync.WaitGroup) { 133 var err error 134 nlog.Infoln(r.Name()) 135 r.config = cmn.GCO.Get() 136 r.workers.Run() 137 loop: 138 for { 139 select { 140 case <-r.IdleTimer(): 141 r.waitPending() 142 break loop 143 case <-r.ChanAbort(): 144 break loop 145 } 146 } 147 148 err = r.stop() 149 if err != nil { 150 r.AddErr(err) 151 } 152 r.Finish() 153 } 154 155 // main method 156 func (r *XactPut) Repl(lom *core.LOM) { 157 debug.Assert(!r.Finished(), r.String()) 158 159 // ref-count on-demand, decrement via worker.Callback = r.do 160 r.IncPending() 161 chanFull, err := r.workers.PostLIF(lom) 162 if err != nil { 163 r.DecPending() 164 r.Abort(fmt.Errorf("%s: %v", r, err)) 165 } 166 if chanFull { 167 r.chanFull.Inc() 168 } 169 } 170 171 func (r *XactPut) waitPending() { 172 const minsleep, longtime = 4 * time.Second, 30 * time.Second 173 var ( 174 started int64 175 cnt, iniCnt int 176 sleep = max(cmn.Rom.MaxKeepalive(), minsleep) 177 ) 178 if cnt = len(r.workCh); cnt == 0 { 179 return 180 } 181 started, iniCnt = mono.NanoTime(), cnt 182 // keep sleeping until the very end 183 for cnt > 0 { 184 r.IncPending() 185 time.Sleep(sleep) 186 r.DecPending() 187 cnt = len(r.workCh) 188 } 189 if d := mono.Since(started); d > longtime { 190 nlog.Infof("%s: took a while to finish %d pending copies: %v", r, iniCnt, d) 191 } 192 } 193 194 func (r *XactPut) stop() (err error) { 195 r.DemandBase.Stop() 196 n := r.workers.Stop() 197 if nn := drainWorkCh(r.workCh); nn > 0 { 198 n += nn 199 } 200 if n > 0 { 201 r.SubPending(n) 202 err = fmt.Errorf("%s: dropped %d object%s", r, n, cos.Plural(n)) 203 } 204 if cnt := r.chanFull.Load(); (cnt >= 10 && cnt <= 20) || (cnt > 0 && cmn.Rom.FastV(5, cos.SmoduleMirror)) { 205 nlog.Errorln("work channel full (all mp workers)", r.String(), cnt) 206 } 207 return 208 } 209 210 func (r *XactPut) Snap() (snap *core.Snap) { 211 snap = &core.Snap{} 212 r.ToSnap(snap) 213 214 snap.IdleX = r.IsIdle() 215 return 216 }