github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/fs/mpather/worker.go (about)

     1  // Package mpather provides per-mountpath concepts.
     2  /*
     3   * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package mpather
     6  
     7  import (
     8  	"fmt"
     9  	"runtime"
    10  
    11  	"github.com/NVIDIA/aistore/cmn"
    12  	"github.com/NVIDIA/aistore/cmn/cos"
    13  	"github.com/NVIDIA/aistore/cmn/debug"
    14  	"github.com/NVIDIA/aistore/core"
    15  	"github.com/NVIDIA/aistore/fs"
    16  	"github.com/NVIDIA/aistore/memsys"
    17  	"golang.org/x/sync/errgroup"
    18  )
    19  
    20  type (
    21  	WorkerGroupOpts struct {
    22  		Callback  func(lom *core.LOM, buf []byte)
    23  		Slab      *memsys.Slab
    24  		QueueSize int
    25  	}
    26  
    27  	// WorkerGroup starts one worker per mountpath; each worker receives (*core.LOM) tasks
    28  	// and executes the specified callback.
    29  	WorkerGroup struct {
    30  		wg      *errgroup.Group
    31  		workers map[string]*worker
    32  	}
    33  	worker struct {
    34  		opts   *WorkerGroupOpts
    35  		mi     *fs.Mountpath
    36  		workCh chan core.LIF
    37  		stopCh cos.StopCh
    38  	}
    39  )
    40  
    41  func NewWorkerGroup(opts *WorkerGroupOpts) *WorkerGroup {
    42  	var (
    43  		mpaths  = fs.GetAvail()
    44  		workers = make(map[string]*worker, len(mpaths))
    45  	)
    46  	debug.Assert(opts.QueueSize > 0) // expect buffered channels
    47  	for _, mi := range mpaths {
    48  		workers[mi.Path] = newWorker(opts, mi)
    49  	}
    50  	return &WorkerGroup{
    51  		wg:      &errgroup.Group{},
    52  		workers: workers,
    53  	}
    54  }
    55  
    56  func (wg *WorkerGroup) Run() {
    57  	for _, worker := range wg.workers {
    58  		wg.wg.Go(worker.work)
    59  	}
    60  }
    61  
    62  func (wg *WorkerGroup) PostLIF(lom *core.LOM) (chanFull bool, err error) {
    63  	mi := lom.Mountpath()
    64  	worker, ok := wg.workers[mi.Path]
    65  	if !ok {
    66  		return false, fmt.Errorf("post-lif: %s not found", mi)
    67  	}
    68  	worker.workCh <- lom.LIF()
    69  	if l, c := len(worker.workCh), cap(worker.workCh); l > c/2 {
    70  		runtime.Gosched() // poor man's throttle
    71  		chanFull = l == c
    72  	}
    73  	return
    74  }
    75  
    76  // Stop aborts all the workers. It should be called after we are sure no more
    77  // new tasks will be dispatched.
    78  func (wg *WorkerGroup) Stop() (n int) {
    79  	for _, worker := range wg.workers {
    80  		n += worker.abort()
    81  	}
    82  	_ = wg.wg.Wait()
    83  	return
    84  }
    85  
    86  func newWorker(opts *WorkerGroupOpts, mi *fs.Mountpath) (w *worker) {
    87  	w = &worker{
    88  		opts:   opts,
    89  		mi:     mi,
    90  		workCh: make(chan core.LIF, opts.QueueSize),
    91  	}
    92  	w.stopCh.Init()
    93  	return
    94  }
    95  
    96  func (w *worker) work() error {
    97  	var buf []byte
    98  	if w.opts.Slab != nil {
    99  		buf = w.opts.Slab.Alloc()
   100  		defer w.opts.Slab.Free(buf)
   101  	}
   102  	for {
   103  		select {
   104  		case lif := <-w.workCh:
   105  			lom, err := lif.LOM()
   106  			if err != nil {
   107  				break
   108  			}
   109  			if err = lom.Load(false /*cache it*/, false); err == nil {
   110  				w.opts.Callback(lom, buf)
   111  			} else {
   112  				core.FreeLOM(lom)
   113  			}
   114  		case <-w.stopCh.Listen(): // ABORT
   115  			close(w.workCh)
   116  
   117  			// `workCh` must be empty (if it is not, workers were not aborted correctly!)
   118  			_, ok := <-w.workCh
   119  			debug.Assert(!ok)
   120  
   121  			return cmn.NewErrAborted(w.String(), "mpath-work", nil)
   122  		}
   123  	}
   124  }
   125  
   126  func (w *worker) abort() int {
   127  	n := drainWorkCh(w.workCh)
   128  	w.stopCh.Close()
   129  	return n
   130  }
   131  
   132  func (w *worker) String() string { return fmt.Sprintf("worker %q", w.mi.Path) }
   133  
   134  func drainWorkCh(workCh chan core.LIF) (n int) {
   135  	for {
   136  		select {
   137  		case <-workCh:
   138  			n++
   139  		default:
   140  			return
   141  		}
   142  	}
   143  }