go.uber.org/cadence@v1.2.9/internal/internal_poller_autoscaler.go (about) 1 // Copyright (c) 2017-2021 Uber Technologies Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package internal 22 23 import ( 24 "context" 25 "errors" 26 "sync" 27 "time" 28 29 "github.com/marusama/semaphore/v2" 30 "go.uber.org/atomic" 31 "go.uber.org/zap" 32 33 "go.uber.org/cadence/internal/common/autoscaler" 34 ) 35 36 // defaultPollerScalerCooldownInSeconds 37 const ( 38 defaultPollerAutoScalerCooldown = time.Minute 39 defaultPollerAutoScalerTargetUtilization = 0.6 40 defaultMinConcurrentPollerSize = 1 41 ) 42 43 var ( 44 _ autoscaler.AutoScaler = (*pollerAutoScaler)(nil) 45 _ autoscaler.Estimator = (*pollerUsageEstimator)(nil) 46 ) 47 48 type ( 49 pollerAutoScaler struct { 50 pollerUsageEstimator 51 52 isDryRun bool 53 cooldownTime time.Duration 54 logger *zap.Logger 55 sem semaphore.Semaphore // resizable semaphore to control number of concurrent pollers 56 ctx context.Context 57 cancel context.CancelFunc 58 wg *sync.WaitGroup // graceful stop 59 recommender autoscaler.Recommender 60 onAutoScale []func() // hook functions that run post autoscale 61 } 62 63 pollerUsageEstimator struct { 64 // This single atomic variable stores two variables: 65 // left 32 bits is noTaskCounts, right 32 bits is taskCounts. 66 // This avoids unnecessary usage of CompareAndSwap 67 atomicBits *atomic.Uint64 68 } 69 70 pollerAutoScalerOptions struct { 71 Enabled bool 72 InitCount int 73 MinCount int 74 MaxCount int 75 Cooldown time.Duration 76 DryRun bool 77 TargetUtilization float64 78 } 79 ) 80 81 func newPollerScaler( 82 options pollerAutoScalerOptions, 83 logger *zap.Logger, 84 hooks ...func()) *pollerAutoScaler { 85 ctx, cancel := context.WithCancel(context.Background()) 86 if !options.Enabled { 87 return nil 88 } 89 90 return &pollerAutoScaler{ 91 isDryRun: options.DryRun, 92 cooldownTime: options.Cooldown, 93 logger: logger, 94 sem: semaphore.New(options.InitCount), 95 wg: &sync.WaitGroup{}, 96 ctx: ctx, 97 cancel: cancel, 98 pollerUsageEstimator: pollerUsageEstimator{atomicBits: atomic.NewUint64(0)}, 99 recommender: autoscaler.NewLinearRecommender( 100 autoscaler.ResourceUnit(options.MinCount), 101 autoscaler.ResourceUnit(options.MaxCount), 102 autoscaler.Usages{ 103 autoscaler.PollerUtilizationRate: autoscaler.MilliUsage(options.TargetUtilization * 1000), 104 }, 105 ), 106 onAutoScale: hooks, 107 } 108 } 109 110 // Acquire concurrent poll quota 111 func (p *pollerAutoScaler) Acquire(resource autoscaler.ResourceUnit) error { 112 return p.sem.Acquire(p.ctx, int(resource)) 113 } 114 115 // Release concurrent poll quota 116 func (p *pollerAutoScaler) Release(resource autoscaler.ResourceUnit) { 117 p.sem.Release(int(resource)) 118 } 119 120 // GetCurrent poll quota 121 func (p *pollerAutoScaler) GetCurrent() autoscaler.ResourceUnit { 122 return autoscaler.ResourceUnit(p.sem.GetLimit()) 123 } 124 125 // Start an auto-scaler go routine and returns a done to stop it 126 func (p *pollerAutoScaler) Start() { 127 logger := p.logger.Sugar() 128 p.wg.Add(1) 129 go func() { 130 defer p.wg.Done() 131 for { 132 select { 133 case <-p.ctx.Done(): 134 return 135 case <-time.After(p.cooldownTime): 136 currentResource := autoscaler.ResourceUnit(p.sem.GetLimit()) 137 currentUsages, err := p.pollerUsageEstimator.Estimate() 138 if err != nil { 139 logger.Warnw("poller autoscaler skip due to estimator error", "error", err) 140 continue 141 } 142 proposedResource := p.recommender.Recommend(currentResource, currentUsages) 143 logger.Debugw("poller autoscaler recommendation", 144 "currentUsage", currentUsages, 145 "current", uint64(currentResource), 146 "recommend", uint64(proposedResource), 147 "isDryRun", p.isDryRun) 148 if !p.isDryRun { 149 p.sem.SetLimit(int(proposedResource)) 150 } 151 p.pollerUsageEstimator.Reset() 152 153 // hooks 154 for i := range p.onAutoScale { 155 p.onAutoScale[i]() 156 } 157 } 158 } 159 }() 160 return 161 } 162 163 // Stop stops the poller autoscaler 164 func (p *pollerAutoScaler) Stop() { 165 p.cancel() 166 p.wg.Wait() 167 } 168 169 // Reset metrics from the start 170 func (m *pollerUsageEstimator) Reset() { 171 m.atomicBits.Store(0) 172 } 173 174 // CollectUsage counts past poll results to estimate autoscaler.Usages 175 func (m *pollerUsageEstimator) CollectUsage(data interface{}) error { 176 isEmpty, err := isTaskEmpty(data) 177 if err != nil { 178 return err 179 } 180 if isEmpty { // no-task poll 181 m.atomicBits.Add(1 << 32) 182 } else { 183 m.atomicBits.Add(1) 184 } 185 return nil 186 } 187 188 func isTaskEmpty(task interface{}) (bool, error) { 189 switch t := task.(type) { 190 case *workflowTask: 191 return t == nil || t.task == nil, nil 192 case *activityTask: 193 return t == nil || t.task == nil, nil 194 case *localActivityTask: 195 return t == nil || t.workflowTask == nil, nil 196 default: 197 return false, errors.New("unknown task type") 198 } 199 } 200 201 // Estimate is based on past poll counts 202 func (m *pollerUsageEstimator) Estimate() (autoscaler.Usages, error) { 203 bits := m.atomicBits.Load() 204 noTaskCounts := bits >> 32 // left 32 bits 205 taskCounts := bits & ((1 << 32) - 1) // right 32 bits 206 if noTaskCounts+taskCounts == 0 { 207 return nil, errors.New("autoscaler.Estimator::Estimate error: not enough data") 208 } 209 210 return autoscaler.Usages{ 211 autoscaler.PollerUtilizationRate: autoscaler.MilliUsage(taskCounts * 1000 / (noTaskCounts + taskCounts)), 212 }, nil 213 }