github.com/m3db/m3@v1.5.0/src/x/sync/pooled_worker_pool.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package sync 22 23 import ( 24 "context" 25 "fmt" 26 "math" 27 "sync" 28 "sync/atomic" 29 "time" 30 31 "github.com/MichaelTJones/pcg" 32 "github.com/uber-go/tally" 33 ) 34 35 const ( 36 numGoroutinesGaugeSampleRate = 1000 37 ) 38 39 type pooledWorkerPool struct { 40 sync.Mutex 41 numRoutinesAtomic int64 42 numWorkingRoutinesAtomic int64 43 numRoutinesGauge tally.Gauge 44 numWorkingRoutinesGauge tally.Gauge 45 growOnDemand bool 46 workChs []chan Work 47 numShards int64 48 killWorkerProbability float64 49 nowFn NowFn 50 } 51 52 // NewPooledWorkerPool creates a new worker pool. 53 func NewPooledWorkerPool(size int, opts PooledWorkerPoolOptions) (PooledWorkerPool, error) { 54 if size <= 0 { 55 return nil, fmt.Errorf("pooled worker pool size too small: %d", size) 56 } 57 58 numShards := opts.NumShards() 59 if int64(size) < numShards { 60 numShards = int64(size) 61 } 62 63 workChs := make([]chan Work, numShards) 64 bufSize := int64(size) / numShards 65 if opts.GrowOnDemand() { 66 // Do not use buffered channels if the pool can grow on demand. This ensures a new worker is spawned if all 67 // workers are currently busy. 68 bufSize = 0 69 } 70 for i := range workChs { 71 workChs[i] = make(chan Work, bufSize) 72 } 73 74 return &pooledWorkerPool{ 75 numRoutinesAtomic: 0, 76 numWorkingRoutinesAtomic: 0, 77 numRoutinesGauge: opts.InstrumentOptions().MetricsScope().Gauge("num-routines"), 78 numWorkingRoutinesGauge: opts.InstrumentOptions().MetricsScope().Gauge("num-working-routines"), 79 growOnDemand: opts.GrowOnDemand(), 80 workChs: workChs, 81 numShards: numShards, 82 killWorkerProbability: opts.KillWorkerProbability(), 83 nowFn: opts.NowFn(), 84 }, nil 85 } 86 87 func (p *pooledWorkerPool) Init() { 88 rng := pcg.NewPCG64() // Just use default seed here 89 for _, workCh := range p.workChs { 90 for i := 0; i < cap(workCh); i++ { 91 p.spawnWorker(rng.Random(), nil, workCh, true) 92 } 93 } 94 } 95 96 func (p *pooledWorkerPool) Go(work Work) { 97 p.work(maybeContext{}, work, 0) 98 } 99 100 func (p *pooledWorkerPool) GoWithTimeout(work Work, timeout time.Duration) bool { 101 return p.work(maybeContext{}, work, timeout) 102 } 103 104 func (p *pooledWorkerPool) GoWithContext(ctx context.Context, work Work) bool { 105 return p.work(maybeContext{ctx: ctx}, work, 0) 106 } 107 108 func (p *pooledWorkerPool) FastContextCheck(batchSize int) PooledWorkerPool { 109 return &fastPooledWorkerPool{workerPool: p, batchSize: batchSize} 110 } 111 112 // maybeContext works around the linter about optionally 113 // passing the context for scenarios where we don't want to use 114 // context in the APIs. 115 type maybeContext struct { 116 ctx context.Context 117 } 118 119 func (p *pooledWorkerPool) work( 120 ctx maybeContext, 121 work Work, 122 timeout time.Duration, 123 ) bool { 124 var ( 125 // Use time.Now() to avoid excessive synchronization 126 currTime = p.nowFn().UnixNano() 127 workChIdx = currTime % p.numShards 128 workCh = p.workChs[workChIdx] 129 ) 130 131 if currTime%numGoroutinesGaugeSampleRate == 0 { 132 p.emitNumRoutines() 133 p.emitNumWorkingRoutines() 134 } 135 136 if !p.growOnDemand { 137 if ctx.ctx == nil && timeout <= 0 { 138 workCh <- work 139 return true 140 } 141 142 if ctx.ctx != nil { 143 // See if canceled first. 144 select { 145 case <-ctx.ctx.Done(): 146 return false 147 default: 148 } 149 150 // Using context for cancellation not timer. 151 select { 152 case workCh <- work: 153 return true 154 case <-ctx.ctx.Done(): 155 return false 156 } 157 } 158 159 // Attempt to try writing without allocating a ticker. 160 select { 161 case workCh <- work: 162 return true 163 default: 164 } 165 166 // Using timeout so allocate a ticker and attempt a write. 167 ticker := time.NewTicker(timeout) 168 defer ticker.Stop() 169 170 select { 171 case workCh <- work: 172 return true 173 case <-ticker.C: 174 return false 175 } 176 } 177 178 select { 179 case workCh <- work: 180 default: 181 // If the queue for the worker we were assigned to is full, 182 // allocate a new goroutine to do the work and then 183 // assign it to be a temporary additional worker for the queue. 184 // This allows the worker pool to accommodate "bursts" of 185 // traffic. Also, it reduces the need for operators to tune the size 186 // of the pool for a given workload. If the pool is initially 187 // sized too small, it will eventually grow to accommodate the 188 // workload, and if the workload decreases the killWorkerProbability 189 // will slowly shrink the pool back down to its original size because 190 // workers created in this manner will not spawn their replacement 191 // before killing themselves. 192 p.spawnWorker(uint64(currTime), work, workCh, false) 193 } 194 return true 195 } 196 197 func (p *pooledWorkerPool) spawnWorker( 198 seed uint64, initialWork Work, workCh chan Work, spawnReplacement bool) { 199 go func() { 200 p.incNumRoutines() 201 if initialWork != nil { 202 initialWork() 203 } 204 205 // RNG per worker to avoid synchronization. 206 var ( 207 rng = pcg.NewPCG64().Seed(seed, seed*2, seed*3, seed*4) 208 // killWorkerProbability is a float but but the PCG RNG only 209 // generates uint64s so we need to identify the uint64 number 210 // that corresponds to the equivalent probability assuming we're 211 // generating random numbers in the entire uint64 range. For example, 212 // if the max uint64 was 1000 and we had a killWorkerProbability of 0.15 213 // then the killThreshold should be 0.15 * 1000 = 150 if we want a randomly 214 // chosen number between 0 and 1000 to have a 15% chance of being below 215 // the selected threshold. 216 killThreshold = uint64(p.killWorkerProbability * float64(math.MaxUint64)) 217 ) 218 for f := range workCh { 219 p.incNumWorkingRoutines() 220 f() 221 p.decNumWorkingRoutines() 222 if rng.Random() < killThreshold { 223 if spawnReplacement { 224 p.spawnWorker(rng.Random(), nil, workCh, true) 225 } 226 p.decNumRoutines() 227 return 228 } 229 } 230 }() 231 } 232 233 func (p *pooledWorkerPool) emitNumRoutines() { 234 numRoutines := float64(p.getNumRoutines()) 235 p.numRoutinesGauge.Update(numRoutines) 236 } 237 238 func (p *pooledWorkerPool) incNumRoutines() { 239 atomic.AddInt64(&p.numRoutinesAtomic, 1) 240 } 241 242 func (p *pooledWorkerPool) decNumRoutines() { 243 atomic.AddInt64(&p.numRoutinesAtomic, -1) 244 } 245 246 func (p *pooledWorkerPool) getNumRoutines() int64 { 247 return atomic.LoadInt64(&p.numRoutinesAtomic) 248 } 249 250 func (p *pooledWorkerPool) emitNumWorkingRoutines() { 251 numRoutines := float64(p.getNumWorkingRoutines()) 252 p.numWorkingRoutinesGauge.Update(numRoutines) 253 } 254 255 func (p *pooledWorkerPool) incNumWorkingRoutines() { 256 atomic.AddInt64(&p.numWorkingRoutinesAtomic, 1) 257 } 258 259 func (p *pooledWorkerPool) decNumWorkingRoutines() { 260 atomic.AddInt64(&p.numWorkingRoutinesAtomic, -1) 261 } 262 263 func (p *pooledWorkerPool) getNumWorkingRoutines() int64 { 264 return atomic.LoadInt64(&p.numWorkingRoutinesAtomic) 265 }