github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/fs/mpather/jogger.go (about) 1 // Package mpather provides per-mountpath concepts. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package mpather 6 7 import ( 8 "context" 9 "fmt" 10 "path/filepath" 11 "runtime" 12 "strings" 13 "time" 14 15 "github.com/NVIDIA/aistore/cmn" 16 "github.com/NVIDIA/aistore/cmn/atomic" 17 "github.com/NVIDIA/aistore/cmn/cos" 18 "github.com/NVIDIA/aistore/cmn/debug" 19 "github.com/NVIDIA/aistore/cmn/nlog" 20 "github.com/NVIDIA/aistore/core" 21 "github.com/NVIDIA/aistore/core/meta" 22 "github.com/NVIDIA/aistore/fs" 23 "github.com/NVIDIA/aistore/memsys" 24 "golang.org/x/sync/errgroup" 25 ) 26 27 // walk all or selected buckets, one at a time 28 29 const ( 30 throttleNumObjects = 64 // unit of self-throttling 31 ) 32 33 type LoadType int 34 35 const ( 36 noLoad LoadType = iota 37 LoadUnsafe 38 Load 39 ) 40 41 const ( 42 ThrottleMinDur = time.Millisecond 43 ThrottleAvgDur = time.Millisecond * 10 44 ThrottleMaxDur = time.Millisecond * 100 45 ) 46 47 type ( 48 JgroupOpts struct { 49 onFinish func() 50 VisitObj func(lom *core.LOM, buf []byte) error 51 VisitCT func(ct *core.CT, buf []byte) error 52 Slab *memsys.Slab 53 Bck cmn.Bck 54 Buckets cmn.Bcks 55 Prefix string 56 CTs []string 57 DoLoad LoadType // if specified, lom.Load(lock type) 58 Parallel int // num parallel calls 59 IncludeCopy bool // visit copies (aka replicas) 60 PerBucket bool // num joggers = (num mountpaths) x (num buckets) 61 SkipGloballyMisplaced bool // skip globally misplaced 62 Throttle bool // true: pace itself depending on disk utilization 63 } 64 65 // Jgroup runs jogger per mountpath which walk the entire bucket and 66 // call callback on each of the encountered object. When jogger encounters 67 // error it stops and informs other joggers about the error (so they stop too). 68 Jgroup struct { 69 wg *errgroup.Group 70 joggers map[string]*jogger 71 finishedCh cos.StopCh // when all joggers are done 72 finishedCnt atomic.Uint32 73 } 74 75 // jogger is being run on each mountpath and executes fs.Walk which call 76 // provided callback. 77 jogger struct { 78 ctx context.Context 79 syncGroup *joggerSyncGroup 80 opts *JgroupOpts 81 mi *fs.Mountpath 82 bdir string // mi.MakePath(bck) 83 objPrefix string // fully-qualified prefix, as in: join(bdir, opts.Prefix) 84 config *cmn.Config 85 stopCh cos.StopCh 86 bufs [][]byte 87 num int64 88 } 89 90 joggerSyncGroup struct { 91 sema chan int // Positional number of a buffer to use by a goroutine. 92 group *errgroup.Group 93 cancel context.CancelFunc 94 } 95 ) 96 97 func NewJoggerGroup(opts *JgroupOpts, config *cmn.Config, mpath string) *Jgroup { 98 var ( 99 joggers map[string]*jogger 100 avail = fs.GetAvail() 101 wg, ctx = errgroup.WithContext(context.Background()) 102 ) 103 debug.Assert(!opts.IncludeCopy || (opts.IncludeCopy && opts.DoLoad > noLoad)) 104 105 jg := &Jgroup{wg: wg} 106 opts.onFinish = jg.markFinished 107 108 switch { 109 case mpath != "": 110 joggers = make(map[string]*jogger, 1) 111 if mi, ok := avail[mpath]; ok { 112 joggers[mi.Path] = newJogger(ctx, opts, mi, config) 113 } 114 case opts.PerBucket: 115 debug.Assert(len(opts.Buckets) > 1) 116 joggers = make(map[string]*jogger, len(avail)*len(opts.Buckets)) 117 for _, bck := range opts.Buckets { 118 nopts := *opts 119 nopts.Buckets = nil 120 nopts.Bck = bck 121 uname := bck.MakeUname("") 122 for _, mi := range avail { 123 joggers[mi.Path+"|"+uname] = newJogger(ctx, &nopts, mi, config) 124 } 125 } 126 default: 127 joggers = make(map[string]*jogger, len(avail)) 128 for _, mi := range avail { 129 joggers[mi.Path] = newJogger(ctx, opts, mi, config) 130 } 131 } 132 133 // this jogger group is a no-op (unlikely) 134 if len(joggers) == 0 { 135 _, disabled := fs.Get() 136 nlog.Errorf("%v: avail=%v, disabled=%v, selected=%q", cmn.ErrNoMountpaths, avail, disabled, mpath) 137 } 138 139 jg.joggers = joggers 140 jg.finishedCh.Init() 141 142 return jg 143 } 144 145 func (jg *Jgroup) Num() int { return len(jg.joggers) } 146 147 func (jg *Jgroup) Run() { 148 for _, jogger := range jg.joggers { 149 jg.wg.Go(jogger.run) 150 } 151 } 152 153 func (jg *Jgroup) Stop() error { 154 for _, jogger := range jg.joggers { 155 jogger.abort() 156 } 157 return jg.wg.Wait() 158 } 159 160 func (jg *Jgroup) ListenFinished() <-chan struct{} { 161 return jg.finishedCh.Listen() 162 } 163 164 func (jg *Jgroup) markFinished() { 165 if n := jg.finishedCnt.Inc(); n == uint32(len(jg.joggers)) { 166 jg.finishedCh.Close() 167 } 168 } 169 170 func newJogger(ctx context.Context, opts *JgroupOpts, mi *fs.Mountpath, config *cmn.Config) (j *jogger) { 171 var syncGroup *joggerSyncGroup 172 if opts.Parallel > 1 { 173 var ( 174 group *errgroup.Group 175 cancel context.CancelFunc 176 ) 177 ctx, cancel = context.WithCancel(ctx) 178 group, ctx = errgroup.WithContext(ctx) 179 syncGroup = &joggerSyncGroup{ 180 sema: make(chan int, opts.Parallel), 181 group: group, 182 cancel: cancel, 183 } 184 for i := range opts.Parallel { 185 syncGroup.sema <- i 186 } 187 } 188 j = &jogger{ 189 ctx: ctx, 190 opts: opts, 191 mi: mi, 192 config: config, 193 syncGroup: syncGroup, 194 } 195 if opts.Prefix != "" { 196 j.bdir = mi.MakePathCT(&j.opts.Bck, fs.ObjectType) // this mountpath's bucket dir that contains objects 197 j.objPrefix = filepath.Join(j.bdir, opts.Prefix) 198 } 199 j.stopCh.Init() 200 return 201 } 202 203 func (j *jogger) run() (err error) { 204 if j.opts.Slab != nil { 205 if j.opts.Parallel <= 1 { 206 j.bufs = [][]byte{j.opts.Slab.Alloc()} 207 } else { 208 j.bufs = make([][]byte, j.opts.Parallel) 209 for i := range j.opts.Parallel { 210 j.bufs[i] = j.opts.Slab.Alloc() 211 } 212 } 213 } 214 215 // 3 running options 216 switch { 217 case len(j.opts.Buckets) > 0: 218 debug.Assert(j.opts.Bck.IsEmpty()) 219 err = j.runSelected() 220 case j.opts.Bck.IsQuery(): 221 err = j.runQbck(cmn.QueryBcks(j.opts.Bck)) 222 default: 223 _, err = j.runBck(&j.opts.Bck) 224 } 225 226 // cleanup 227 if j.opts.Slab != nil { 228 for _, buf := range j.bufs { 229 j.opts.Slab.Free(buf) 230 } 231 } 232 j.opts.onFinish() 233 return 234 } 235 236 // run selected buckets, one at a time 237 func (j *jogger) runSelected() error { 238 var errs cos.Errs 239 for i := range j.opts.Buckets { 240 aborted, err := j.runBck(&j.opts.Buckets[i]) 241 if err != nil { 242 errs.Add(err) 243 } 244 if aborted { 245 return &errs 246 } 247 } 248 return nil 249 } 250 251 // run matching, one at a time 252 func (j *jogger) runQbck(qbck cmn.QueryBcks) (err error) { 253 var ( 254 bmd = core.T.Bowner().Get() 255 provider *string 256 ns *cmn.Ns 257 errs cos.Errs 258 ) 259 if qbck.Provider != "" { 260 provider = &qbck.Provider 261 } 262 if !qbck.Ns.IsGlobal() { 263 ns = &qbck.Ns 264 } 265 bmd.Range(provider, ns, func(bck *meta.Bck) bool { 266 aborted, errV := j.runBck(bck.Bucket()) 267 if err != nil { 268 errs.Add(errV) 269 err = &errs 270 } 271 return aborted 272 }) 273 return 274 } 275 276 // run single (see also: `PerBucket` above) 277 func (j *jogger) runBck(bck *cmn.Bck) (aborted bool, err error) { 278 opts := &fs.WalkOpts{ 279 Mi: j.mi, 280 CTs: j.opts.CTs, 281 Callback: j.jog, 282 Sorted: false, 283 } 284 opts.Bck.Copy(bck) 285 286 err = fs.Walk(opts) 287 if j.syncGroup != nil { 288 // If callbacks are executed in goroutines, fs.Walk can stop before the callbacks return. 289 // We have to wait for them and check if there was any error. 290 if err == nil { 291 err = j.syncGroup.waitForAsyncTasks() 292 } else { 293 j.syncGroup.abortAsyncTasks() 294 } 295 } 296 297 if err != nil { 298 if cmn.IsErrAborted(err) { 299 nlog.Infof("%s stopping traversal: %v", j, err) 300 return true, nil 301 } 302 return false, err 303 } 304 return false, nil 305 } 306 307 func (j *jogger) jog(fqn string, de fs.DirEntry) error { 308 if j.objPrefix != "" && strings.HasPrefix(fqn, j.bdir) { 309 if de.IsDir() { 310 if !cmn.DirHasOrIsPrefix(fqn, j.objPrefix) { 311 return filepath.SkipDir 312 } 313 } else if !strings.HasPrefix(fqn, j.objPrefix) { 314 return nil 315 } 316 } 317 if de.IsDir() { 318 return nil 319 } 320 321 if err := j.checkStopped(); err != nil { 322 return err 323 } 324 325 var bufPosition int 326 if j.syncGroup == nil { 327 if err := j.visitFQN(fqn, j.getBuf(0)); err != nil { 328 return err 329 } 330 } else { 331 select { 332 case bufPosition = <-j.syncGroup.sema: 333 break 334 case <-j.ctx.Done(): 335 return j.ctx.Err() 336 } 337 338 j.syncGroup.group.Go(func() error { 339 defer func() { 340 // NOTE: There is no need to select j.ctx.Done() as put to this chanel is immediate. 341 j.syncGroup.sema <- bufPosition 342 }() 343 return j.visitFQN(fqn, j.getBuf(bufPosition)) 344 }) 345 } 346 347 if j.opts.Throttle { 348 j.num++ 349 if (j.num % throttleNumObjects) == 0 { 350 j.throttle() 351 } else { 352 runtime.Gosched() 353 } 354 } 355 return nil 356 } 357 358 func (j *jogger) visitFQN(fqn string, buf []byte) error { 359 ct, err := core.NewCTFromFQN(fqn, core.T.Bowner()) 360 if err != nil { 361 return err 362 } 363 364 if j.opts.SkipGloballyMisplaced { 365 smap := core.T.Sowner().Get() 366 tsi, err := smap.HrwHash2T(ct.Digest()) 367 if err != nil { 368 return err 369 } 370 if tsi.ID() != core.T.SID() { 371 return nil 372 } 373 } 374 375 switch ct.ContentType() { 376 case fs.ObjectType: 377 lom := core.AllocLOM("") 378 lom.InitCT(ct) 379 err := j.visitObj(lom, buf) 380 // NOTE: j.opts.visitObj() callback implementations must either finish 381 // synchronously or pass lom.LIF to another goroutine 382 core.FreeLOM(lom) 383 return err 384 default: 385 if err := j.visitCT(ct, buf); err != nil { 386 return err 387 } 388 } 389 return nil 390 } 391 392 func (j *jogger) visitObj(lom *core.LOM, buf []byte) (err error) { 393 switch j.opts.DoLoad { 394 case noLoad: 395 goto visit 396 case LoadUnsafe: 397 err = lom.LoadUnsafe() 398 case Load: 399 err = lom.Load(false, false) 400 default: 401 debug.Assert(false, "invalid 'opts.DoLoad'", j.opts.DoLoad) 402 } 403 if err != nil { 404 return 405 } 406 if !j.opts.IncludeCopy && lom.IsCopy() { 407 return nil 408 } 409 visit: 410 return j.opts.VisitObj(lom, buf) 411 } 412 413 func (j *jogger) visitCT(ct *core.CT, buf []byte) error { return j.opts.VisitCT(ct, buf) } 414 415 func (j *jogger) getBuf(position int) []byte { 416 if j.bufs == nil { 417 return nil 418 } 419 return j.bufs[position] 420 } 421 422 func (j *jogger) checkStopped() error { 423 select { 424 case <-j.ctx.Done(): // Some other worker has exited with error and canceled context. 425 return j.ctx.Err() 426 case <-j.stopCh.Listen(): // Worker has been aborted. 427 return cmn.NewErrAborted(j.String(), "mpath-jog", nil) 428 default: 429 return nil 430 } 431 } 432 433 func (sg *joggerSyncGroup) waitForAsyncTasks() error { 434 return sg.group.Wait() 435 } 436 437 func (sg *joggerSyncGroup) abortAsyncTasks() error { 438 sg.cancel() 439 return sg.waitForAsyncTasks() 440 } 441 442 func (j *jogger) throttle() { 443 curUtil := fs.GetMpathUtil(j.mi.Path) 444 if curUtil >= j.config.Disk.DiskUtilHighWM { 445 time.Sleep(ThrottleMinDur) 446 } 447 } 448 449 func (j *jogger) abort() { j.stopCh.Close() } 450 func (j *jogger) String() string { return fmt.Sprintf("jogger [%s/%s]", j.mi, j.opts.Bck) }