github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/xact/xs/nsumm.go (about) 1 // Package xs is a collection of eXtended actions (xactions), including multi-object 2 // operations, list-objects, (cluster) rebalance and (target) resilver, ETL, and more. 3 /* 4 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 5 */ 6 package xs 7 8 import ( 9 "fmt" 10 "math" 11 "sync" 12 ratomic "sync/atomic" 13 14 "github.com/NVIDIA/aistore/api/apc" 15 "github.com/NVIDIA/aistore/cmn" 16 "github.com/NVIDIA/aistore/cmn/cos" 17 "github.com/NVIDIA/aistore/cmn/debug" 18 "github.com/NVIDIA/aistore/cmn/nlog" 19 "github.com/NVIDIA/aistore/core" 20 "github.com/NVIDIA/aistore/core/meta" 21 "github.com/NVIDIA/aistore/fs" 22 "github.com/NVIDIA/aistore/fs/mpather" 23 "github.com/NVIDIA/aistore/sys" 24 "github.com/NVIDIA/aistore/xact" 25 "github.com/NVIDIA/aistore/xact/xreg" 26 ) 27 28 type ( 29 nsummFactory struct { 30 xreg.RenewBase 31 xctn *XactNsumm 32 msg *apc.BsummCtrlMsg 33 } 34 XactNsumm struct { 35 p *nsummFactory 36 oneRes cmn.BsummResult 37 mapRes map[uint64]*cmn.BsummResult 38 buckets []*meta.Bck 39 _nam, _str string 40 totalDiskSize uint64 41 xact.BckJog 42 single bool 43 listRemote bool 44 } 45 ) 46 47 // interface guard 48 var ( 49 _ xreg.Renewable = (*nsummFactory)(nil) 50 _ core.Xact = (*XactNsumm)(nil) 51 ) 52 53 ////////////////// 54 // nsummFactory // 55 ////////////////// 56 57 func (*nsummFactory) New(args xreg.Args, bck *meta.Bck) xreg.Renewable { 58 msg := args.Custom.(*apc.BsummCtrlMsg) 59 p := &nsummFactory{RenewBase: xreg.RenewBase{Args: args, Bck: bck}, msg: msg} 60 return p 61 } 62 63 func (p *nsummFactory) Start() (err error) { 64 p.xctn, err = newSumm(p) 65 if err == nil { 66 xact.GoRunW(p.xctn) 67 } 68 return 69 } 70 71 func (*nsummFactory) Kind() string { return apc.ActSummaryBck } 72 func (p *nsummFactory) Get() core.Xact { return p.xctn } 73 74 func (*nsummFactory) WhenPrevIsRunning(xreg.Renewable) (xreg.WPR, error) { 75 return xreg.WprKeepAndStartNew, nil 76 } 77 78 func newSumm(p *nsummFactory) (r *XactNsumm, err error) { 79 r = &XactNsumm{p: p} 80 81 r.totalDiskSize = fs.GetDiskSize() 82 83 listRemote := p.Bck.IsCloud() && !p.msg.ObjCached 84 if listRemote { 85 var ( 86 smap = core.T.Sowner().Get() 87 tsi *meta.Snode 88 ) 89 if tsi, err = smap.HrwTargetTask(p.UUID()); err != nil { 90 return r, err 91 } 92 r.listRemote = listRemote && tsi.ID() == core.T.SID() // this target 93 } 94 95 opts := &mpather.JgroupOpts{ 96 CTs: []string{fs.ObjectType}, 97 Prefix: p.msg.Prefix, 98 VisitObj: r.visitObj, 99 DoLoad: mpather.LoadUnsafe, 100 IncludeCopy: true, 101 } 102 if p.Bck.IsQuery() { 103 var single *meta.Bck 104 r.mapRes = make(map[uint64]*cmn.BsummResult, 8) 105 opts.Buckets, single = r.initResQbck() 106 107 nb := len(opts.Buckets) 108 switch nb { 109 case 0: 110 return r, fmt.Errorf("no buckets matching %q", p.Bck.Bucket()) 111 case 1: 112 // change of mind: single even though spec-ed as qbck 113 p.Bck = single 114 opts.Buckets = nil 115 r.buckets = nil 116 goto single 117 default: 118 // inc num joggers to boost 119 nmps := fs.NumAvail() 120 if nmps == 0 { 121 return r, cmn.ErrNoMountpaths 122 } 123 opts.PerBucket = nb*nmps <= sys.NumCPU() 124 goto ini 125 } 126 } 127 single: 128 r.initRes(&r.oneRes, p.Bck) 129 r.single = true 130 opts.Bck = p.Bck.Clone() 131 ini: 132 r.BckJog.Init(p.UUID(), p.Kind(), p.Bck, opts, cmn.GCO.Get()) 133 134 s := fmt.Sprintf("-msg-%+v", r.p.msg) 135 r._nam = r.Base.Name() + s 136 r._str = r.Base.String() + s 137 return r, nil 138 } 139 140 func (r *XactNsumm) Run(started *sync.WaitGroup) { 141 started.Done() 142 nlog.Infoln(r.Name(), r.p.Bck.Cname("")) 143 144 var wg cos.WG 145 if r.listRemote { 146 // _this_ target to list-and-summ remote pages, in parallel 147 if r.single { 148 wg = &sync.WaitGroup{} 149 wg.Add(1) 150 go func(wg cos.WG) { 151 r.runCloudBck(r.p.Bck, &r.oneRes) 152 wg.Done() 153 }(wg) 154 } else { 155 debug.Assert(len(r.buckets) > 1) 156 wg = cos.NewLimitedWaitGroup(sys.NumCPU(), len(r.buckets)) 157 for _, bck := range r.buckets { 158 res, ok := r.mapRes[bck.Props.BID] 159 debug.Assert(ok, r.Name(), bck.Cname("")) 160 wg.Add(1) 161 go func(bck *meta.Bck, wg cos.WG) { 162 r.runCloudBck(bck, res) 163 wg.Done() 164 }(bck, wg) 165 } 166 } 167 } 168 r.BckJog.Run() 169 170 err := r.BckJog.Wait() 171 if err != nil { 172 r.AddErr(err) 173 } 174 if wg != nil { 175 debug.Assert(r.listRemote) 176 wg.Wait() 177 } 178 179 r.Finish() 180 } 181 182 // to add all `res` pointers up front 183 func (r *XactNsumm) initResQbck() (cmn.Bcks, *meta.Bck) { 184 var ( 185 bmd = core.T.Bowner().Get() 186 qbck = (*cmn.QueryBcks)(r.p.Bck) 187 provider *string 188 ns *cmn.Ns 189 buckets = make(cmn.Bcks, 0, 8) // => jogger opts 190 single *meta.Bck 191 ) 192 if r.listRemote { 193 r.buckets = make([]*meta.Bck, 0, 8) 194 } 195 if qbck.Provider != "" { 196 provider = &qbck.Provider 197 } 198 if !qbck.Ns.IsGlobal() { 199 ns = &qbck.Ns 200 } 201 bmd.Range(provider, ns, func(bck *meta.Bck) bool { 202 res := &cmn.BsummResult{} 203 r.initRes(res, bck) 204 debug.Assert(bck.Props.BID != 0) 205 r.mapRes[bck.Props.BID] = res 206 buckets = append(buckets, res.Bck) 207 208 if r.listRemote { 209 r.buckets = append(r.buckets, bck) 210 } 211 single = bck 212 return false 213 }) 214 return buckets, single 215 } 216 217 func (r *XactNsumm) initRes(res *cmn.BsummResult, bck *meta.Bck) { 218 debug.Assert(r.totalDiskSize > 0) 219 res.Bck = bck.Clone() 220 res.TotalSize.Disks = r.totalDiskSize 221 res.ObjSize.Min = math.MaxInt64 222 res.TotalSize.OnDisk = fs.OnDiskSize(bck.Bucket(), r.p.msg.Prefix) 223 } 224 225 func (r *XactNsumm) String() string { return r._str } 226 func (r *XactNsumm) Name() string { return r._nam } 227 228 func (r *XactNsumm) Snap() (snap *core.Snap) { 229 snap = &core.Snap{} 230 r.ToSnap(snap) 231 snap.IdleX = r.IsIdle() 232 return 233 } 234 235 func (r *XactNsumm) Result() (cmn.AllBsummResults, error) { 236 if r.single { 237 var res cmn.BsummResult 238 r.cloneRes(&res, &r.oneRes) 239 return cmn.AllBsummResults{&res}, r.Err() 240 } 241 242 all := make(cmn.AllBsummResults, 0, len(r.mapRes)) 243 for _, src := range r.mapRes { 244 var dst cmn.BsummResult 245 r.cloneRes(&dst, src) 246 all = append(all, &dst) 247 } 248 return all, r.Err() 249 } 250 251 func (r *XactNsumm) cloneRes(dst, src *cmn.BsummResult) { 252 dst.Bck = src.Bck 253 dst.TotalSize.OnDisk = src.TotalSize.OnDisk 254 255 dst.ObjCount.Present = ratomic.LoadUint64(&src.ObjCount.Present) 256 dst.TotalSize.PresentObjs = ratomic.LoadUint64(&src.TotalSize.PresentObjs) 257 258 if r.listRemote { 259 dst.ObjCount.Remote = ratomic.LoadUint64(&src.ObjCount.Remote) 260 dst.TotalSize.RemoteObjs = ratomic.LoadUint64(&src.TotalSize.RemoteObjs) 261 } 262 263 dst.ObjSize.Min = ratomic.LoadInt64(&src.ObjSize.Min) 264 if dst.ObjSize.Min == math.MaxInt64 { 265 dst.ObjSize.Min = 0 266 } 267 dst.ObjSize.Max = ratomic.LoadInt64(&src.ObjSize.Max) 268 269 // compute the current (maybe, running-and-changing) average and used %% 270 if dst.ObjCount.Present > 0 { 271 dst.ObjSize.Avg = int64(cos.DivRoundU64(dst.TotalSize.PresentObjs, dst.ObjCount.Present)) 272 } 273 debug.Assert(r.totalDiskSize == src.TotalSize.Disks) 274 dst.TotalSize.Disks = r.totalDiskSize 275 dst.UsedPct = cos.DivRoundU64(dst.TotalSize.OnDisk*100, r.totalDiskSize) 276 } 277 278 func (r *XactNsumm) visitObj(lom *core.LOM, _ []byte) error { 279 var res *cmn.BsummResult 280 if r.single { 281 res = &r.oneRes 282 } else { 283 s, ok := r.mapRes[lom.Bprops().BID] 284 debug.Assert(ok, r.Name(), lom.Cname()) // j.opts.Buckets above 285 res = s 286 } 287 if !lom.IsCopy() { 288 ratomic.AddUint64(&res.ObjCount.Present, 1) 289 } 290 size := lom.SizeBytes() 291 if cmin := ratomic.LoadInt64(&res.ObjSize.Min); cmin > size { 292 ratomic.CompareAndSwapInt64(&res.ObjSize.Min, cmin, size) 293 } 294 if cmax := ratomic.LoadInt64(&res.ObjSize.Max); cmax < size { 295 ratomic.CompareAndSwapInt64(&res.ObjSize.Max, cmax, size) 296 } 297 ratomic.AddUint64(&res.TotalSize.PresentObjs, uint64(size)) 298 299 // generic stats (same as base.LomAdd()) 300 r.ObjsAdd(1, size) 301 return nil 302 } 303 304 // 305 // listRemote 306 // 307 308 func (r *XactNsumm) runCloudBck(bck *meta.Bck, res *cmn.BsummResult) { 309 lsmsg := &apc.LsoMsg{Props: apc.GetPropsSize, Prefix: r.p.msg.Prefix} 310 lsmsg.SetFlag(apc.LsNameSize) 311 for !r.IsAborted() { 312 npg := newNpgCtx(bck, lsmsg, noopCb, nil) // TODO -- FIXME: inventory offset 313 nentries := allocLsoEntries() 314 lst, err := npg.nextPageR(nentries, false /*load LOMs to include status and local MD*/) 315 if err != nil { 316 r.AddErr(err) 317 return 318 } 319 ratomic.AddUint64(&res.ObjCount.Remote, uint64(len(lst.Entries))) 320 for _, v := range lst.Entries { 321 ratomic.AddUint64(&res.TotalSize.RemoteObjs, uint64(v.Size)) 322 } 323 freeLsoEntries(lst.Entries) 324 if lsmsg.ContinuationToken = lst.ContinuationToken; lsmsg.ContinuationToken == "" { 325 return 326 } 327 } 328 }