github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/x/sync/pooled_worker_pool.go

github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/x/sync/pooled_worker_pool.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package sync
    22  
    23  import (
    24  	"context"
    25  	"fmt"
    26  	"math"
    27  	"sync"
    28  	"sync/atomic"
    29  	"time"
    30  
    31  	"github.com/MichaelTJones/pcg"
    32  	"github.com/uber-go/tally"
    33  )
    34  
    35  const (
    36  	numGoroutinesGaugeSampleRate = 1000
    37  )
    38  
    39  type pooledWorkerPool struct {
    40  	sync.Mutex
    41  	numRoutinesAtomic        int64
    42  	numWorkingRoutinesAtomic int64
    43  	numRoutinesGauge         tally.Gauge
    44  	numWorkingRoutinesGauge  tally.Gauge
    45  	growOnDemand             bool
    46  	workChs                  []chan Work
    47  	numShards                int64
    48  	killWorkerProbability    float64
    49  	nowFn                    NowFn
    50  }
    51  
    52  // NewPooledWorkerPool creates a new worker pool.
    53  func NewPooledWorkerPool(size int, opts PooledWorkerPoolOptions) (PooledWorkerPool, error) {
    54  	if size <= 0 {
    55  		return nil, fmt.Errorf("pooled worker pool size too small: %d", size)
    56  	}
    57  
    58  	numShards := opts.NumShards()
    59  	if int64(size) < numShards {
    60  		numShards = int64(size)
    61  	}
    62  
    63  	workChs := make([]chan Work, numShards)
    64  	bufSize := int64(size) / numShards
    65  	if opts.GrowOnDemand() {
    66  		// Do not use buffered channels if the pool can grow on demand. This ensures a new worker is spawned if all
    67  		// workers are currently busy.
    68  		bufSize = 0
    69  	}
    70  	for i := range workChs {
    71  		workChs[i] = make(chan Work, bufSize)
    72  	}
    73  
    74  	return &pooledWorkerPool{
    75  		numRoutinesAtomic:        0,
    76  		numWorkingRoutinesAtomic: 0,
    77  		numRoutinesGauge:         opts.InstrumentOptions().MetricsScope().Gauge("num-routines"),
    78  		numWorkingRoutinesGauge:  opts.InstrumentOptions().MetricsScope().Gauge("num-working-routines"),
    79  		growOnDemand:             opts.GrowOnDemand(),
    80  		workChs:                  workChs,
    81  		numShards:                numShards,
    82  		killWorkerProbability:    opts.KillWorkerProbability(),
    83  		nowFn:                    opts.NowFn(),
    84  	}, nil
    85  }
    86  
    87  func (p *pooledWorkerPool) Init() {
    88  	rng := pcg.NewPCG64() // Just use default seed here
    89  	for _, workCh := range p.workChs {
    90  		for i := 0; i < cap(workCh); i++ {
    91  			p.spawnWorker(rng.Random(), nil, workCh, true)
    92  		}
    93  	}
    94  }
    95  
    96  func (p *pooledWorkerPool) Go(work Work) {
    97  	p.work(maybeContext{}, work, 0)
    98  }
    99  
   100  func (p *pooledWorkerPool) GoWithTimeout(work Work, timeout time.Duration) bool {
   101  	return p.work(maybeContext{}, work, timeout)
   102  }
   103  
   104  func (p *pooledWorkerPool) GoWithContext(ctx context.Context, work Work) bool {
   105  	return p.work(maybeContext{ctx: ctx}, work, 0)
   106  }
   107  
   108  func (p *pooledWorkerPool) FastContextCheck(batchSize int) PooledWorkerPool {
   109  	return &fastPooledWorkerPool{workerPool: p, batchSize: batchSize}
   110  }
   111  
   112  // maybeContext works around the linter about optionally
   113  // passing the context for scenarios where we don't want to use
   114  // context in the APIs.
   115  type maybeContext struct {
   116  	ctx context.Context
   117  }
   118  
   119  func (p *pooledWorkerPool) work(
   120  	ctx maybeContext,
   121  	work Work,
   122  	timeout time.Duration,
   123  ) bool {
   124  	var (
   125  		// Use time.Now() to avoid excessive synchronization
   126  		currTime  = p.nowFn().UnixNano()
   127  		workChIdx = currTime % p.numShards
   128  		workCh    = p.workChs[workChIdx]
   129  	)
   130  
   131  	if currTime%numGoroutinesGaugeSampleRate == 0 {
   132  		p.emitNumRoutines()
   133  		p.emitNumWorkingRoutines()
   134  	}
   135  
   136  	if !p.growOnDemand {
   137  		if ctx.ctx == nil && timeout <= 0 {
   138  			workCh <- work
   139  			return true
   140  		}
   141  
   142  		if ctx.ctx != nil {
   143  			// See if canceled first.
   144  			select {
   145  			case <-ctx.ctx.Done():
   146  				return false
   147  			default:
   148  			}
   149  
   150  			// Using context for cancellation not timer.
   151  			select {
   152  			case workCh <- work:
   153  				return true
   154  			case <-ctx.ctx.Done():
   155  				return false
   156  			}
   157  		}
   158  
   159  		// Attempt to try writing without allocating a ticker.
   160  		select {
   161  		case workCh <- work:
   162  			return true
   163  		default:
   164  		}
   165  
   166  		// Using timeout so allocate a ticker and attempt a write.
   167  		ticker := time.NewTicker(timeout)
   168  		defer ticker.Stop()
   169  
   170  		select {
   171  		case workCh <- work:
   172  			return true
   173  		case <-ticker.C:
   174  			return false
   175  		}
   176  	}
   177  
   178  	select {
   179  	case workCh <- work:
   180  	default:
   181  		// If the queue for the worker we were assigned to is full,
   182  		// allocate a new goroutine to do the work and then
   183  		// assign it to be a temporary additional worker for the queue.
   184  		// This allows the worker pool to accommodate "bursts" of
   185  		// traffic. Also, it reduces the need for operators to tune the size
   186  		// of the pool for a given workload. If the pool is initially
   187  		// sized too small, it will eventually grow to accommodate the
   188  		// workload, and if the workload decreases the killWorkerProbability
   189  		// will slowly shrink the pool back down to its original size because
   190  		// workers created in this manner will not spawn their replacement
   191  		// before killing themselves.
   192  		p.spawnWorker(uint64(currTime), work, workCh, false)
   193  	}
   194  	return true
   195  }
   196  
   197  func (p *pooledWorkerPool) spawnWorker(
   198  	seed uint64, initialWork Work, workCh chan Work, spawnReplacement bool) {
   199  	go func() {
   200  		p.incNumRoutines()
   201  		if initialWork != nil {
   202  			initialWork()
   203  		}
   204  
   205  		// RNG per worker to avoid synchronization.
   206  		var (
   207  			rng = pcg.NewPCG64().Seed(seed, seed*2, seed*3, seed*4)
   208  			// killWorkerProbability is a float but but the PCG RNG only
   209  			// generates uint64s so we need to identify the uint64 number
   210  			// that corresponds to the equivalent probability assuming we're
   211  			// generating random numbers in the entire uint64 range. For example,
   212  			// if the max uint64 was 1000 and we had a killWorkerProbability of 0.15
   213  			// then the killThreshold should be 0.15 * 1000 = 150 if we want a randomly
   214  			// chosen number between 0 and 1000 to have a 15% chance of being below
   215  			// the selected threshold.
   216  			killThreshold = uint64(p.killWorkerProbability * float64(math.MaxUint64))
   217  		)
   218  		for f := range workCh {
   219  			p.incNumWorkingRoutines()
   220  			f()
   221  			p.decNumWorkingRoutines()
   222  			if rng.Random() < killThreshold {
   223  				if spawnReplacement {
   224  					p.spawnWorker(rng.Random(), nil, workCh, true)
   225  				}
   226  				p.decNumRoutines()
   227  				return
   228  			}
   229  		}
   230  	}()
   231  }
   232  
   233  func (p *pooledWorkerPool) emitNumRoutines() {
   234  	numRoutines := float64(p.getNumRoutines())
   235  	p.numRoutinesGauge.Update(numRoutines)
   236  }
   237  
   238  func (p *pooledWorkerPool) incNumRoutines() {
   239  	atomic.AddInt64(&p.numRoutinesAtomic, 1)
   240  }
   241  
   242  func (p *pooledWorkerPool) decNumRoutines() {
   243  	atomic.AddInt64(&p.numRoutinesAtomic, -1)
   244  }
   245  
   246  func (p *pooledWorkerPool) getNumRoutines() int64 {
   247  	return atomic.LoadInt64(&p.numRoutinesAtomic)
   248  }
   249  
   250  func (p *pooledWorkerPool) emitNumWorkingRoutines() {
   251  	numRoutines := float64(p.getNumWorkingRoutines())
   252  	p.numWorkingRoutinesGauge.Update(numRoutines)
   253  }
   254  
   255  func (p *pooledWorkerPool) incNumWorkingRoutines() {
   256  	atomic.AddInt64(&p.numWorkingRoutinesAtomic, 1)
   257  }
   258  
   259  func (p *pooledWorkerPool) decNumWorkingRoutines() {
   260  	atomic.AddInt64(&p.numWorkingRoutinesAtomic, -1)
   261  }
   262  
   263  func (p *pooledWorkerPool) getNumWorkingRoutines() int64 {
   264  	return atomic.LoadInt64(&p.numWorkingRoutinesAtomic)
   265  }