go.uber.org/cadence@v1.2.9/internal/internal_poller_autoscaler.go (about)

     1  // Copyright (c) 2017-2021 Uber Technologies Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package internal
    22  
    23  import (
    24  	"context"
    25  	"errors"
    26  	"sync"
    27  	"time"
    28  
    29  	"github.com/marusama/semaphore/v2"
    30  	"go.uber.org/atomic"
    31  	"go.uber.org/zap"
    32  
    33  	"go.uber.org/cadence/internal/common/autoscaler"
    34  )
    35  
    36  // defaultPollerScalerCooldownInSeconds
    37  const (
    38  	defaultPollerAutoScalerCooldown          = time.Minute
    39  	defaultPollerAutoScalerTargetUtilization = 0.6
    40  	defaultMinConcurrentPollerSize           = 1
    41  )
    42  
    43  var (
    44  	_ autoscaler.AutoScaler = (*pollerAutoScaler)(nil)
    45  	_ autoscaler.Estimator  = (*pollerUsageEstimator)(nil)
    46  )
    47  
    48  type (
    49  	pollerAutoScaler struct {
    50  		pollerUsageEstimator
    51  
    52  		isDryRun     bool
    53  		cooldownTime time.Duration
    54  		logger       *zap.Logger
    55  		sem          semaphore.Semaphore // resizable semaphore to control number of concurrent pollers
    56  		ctx          context.Context
    57  		cancel       context.CancelFunc
    58  		wg           *sync.WaitGroup // graceful stop
    59  		recommender  autoscaler.Recommender
    60  		onAutoScale  []func() // hook functions that run post autoscale
    61  	}
    62  
    63  	pollerUsageEstimator struct {
    64  		// This single atomic variable stores two variables:
    65  		// left 32 bits is noTaskCounts, right 32 bits is taskCounts.
    66  		// This avoids unnecessary usage of CompareAndSwap
    67  		atomicBits *atomic.Uint64
    68  	}
    69  
    70  	pollerAutoScalerOptions struct {
    71  		Enabled           bool
    72  		InitCount         int
    73  		MinCount          int
    74  		MaxCount          int
    75  		Cooldown          time.Duration
    76  		DryRun            bool
    77  		TargetUtilization float64
    78  	}
    79  )
    80  
    81  func newPollerScaler(
    82  	options pollerAutoScalerOptions,
    83  	logger *zap.Logger,
    84  	hooks ...func()) *pollerAutoScaler {
    85  	ctx, cancel := context.WithCancel(context.Background())
    86  	if !options.Enabled {
    87  		return nil
    88  	}
    89  
    90  	return &pollerAutoScaler{
    91  		isDryRun:             options.DryRun,
    92  		cooldownTime:         options.Cooldown,
    93  		logger:               logger,
    94  		sem:                  semaphore.New(options.InitCount),
    95  		wg:                   &sync.WaitGroup{},
    96  		ctx:                  ctx,
    97  		cancel:               cancel,
    98  		pollerUsageEstimator: pollerUsageEstimator{atomicBits: atomic.NewUint64(0)},
    99  		recommender: autoscaler.NewLinearRecommender(
   100  			autoscaler.ResourceUnit(options.MinCount),
   101  			autoscaler.ResourceUnit(options.MaxCount),
   102  			autoscaler.Usages{
   103  				autoscaler.PollerUtilizationRate: autoscaler.MilliUsage(options.TargetUtilization * 1000),
   104  			},
   105  		),
   106  		onAutoScale: hooks,
   107  	}
   108  }
   109  
   110  // Acquire concurrent poll quota
   111  func (p *pollerAutoScaler) Acquire(resource autoscaler.ResourceUnit) error {
   112  	return p.sem.Acquire(p.ctx, int(resource))
   113  }
   114  
   115  // Release concurrent poll quota
   116  func (p *pollerAutoScaler) Release(resource autoscaler.ResourceUnit) {
   117  	p.sem.Release(int(resource))
   118  }
   119  
   120  // GetCurrent poll quota
   121  func (p *pollerAutoScaler) GetCurrent() autoscaler.ResourceUnit {
   122  	return autoscaler.ResourceUnit(p.sem.GetLimit())
   123  }
   124  
   125  // Start an auto-scaler go routine and returns a done to stop it
   126  func (p *pollerAutoScaler) Start() {
   127  	logger := p.logger.Sugar()
   128  	p.wg.Add(1)
   129  	go func() {
   130  		defer p.wg.Done()
   131  		for {
   132  			select {
   133  			case <-p.ctx.Done():
   134  				return
   135  			case <-time.After(p.cooldownTime):
   136  				currentResource := autoscaler.ResourceUnit(p.sem.GetLimit())
   137  				currentUsages, err := p.pollerUsageEstimator.Estimate()
   138  				if err != nil {
   139  					logger.Warnw("poller autoscaler skip due to estimator error", "error", err)
   140  					continue
   141  				}
   142  				proposedResource := p.recommender.Recommend(currentResource, currentUsages)
   143  				logger.Debugw("poller autoscaler recommendation",
   144  					"currentUsage", currentUsages,
   145  					"current", uint64(currentResource),
   146  					"recommend", uint64(proposedResource),
   147  					"isDryRun", p.isDryRun)
   148  				if !p.isDryRun {
   149  					p.sem.SetLimit(int(proposedResource))
   150  				}
   151  				p.pollerUsageEstimator.Reset()
   152  
   153  				// hooks
   154  				for i := range p.onAutoScale {
   155  					p.onAutoScale[i]()
   156  				}
   157  			}
   158  		}
   159  	}()
   160  	return
   161  }
   162  
   163  // Stop stops the poller autoscaler
   164  func (p *pollerAutoScaler) Stop() {
   165  	p.cancel()
   166  	p.wg.Wait()
   167  }
   168  
   169  // Reset metrics from the start
   170  func (m *pollerUsageEstimator) Reset() {
   171  	m.atomicBits.Store(0)
   172  }
   173  
   174  // CollectUsage counts past poll results to estimate autoscaler.Usages
   175  func (m *pollerUsageEstimator) CollectUsage(data interface{}) error {
   176  	isEmpty, err := isTaskEmpty(data)
   177  	if err != nil {
   178  		return err
   179  	}
   180  	if isEmpty { // no-task poll
   181  		m.atomicBits.Add(1 << 32)
   182  	} else {
   183  		m.atomicBits.Add(1)
   184  	}
   185  	return nil
   186  }
   187  
   188  func isTaskEmpty(task interface{}) (bool, error) {
   189  	switch t := task.(type) {
   190  	case *workflowTask:
   191  		return t == nil || t.task == nil, nil
   192  	case *activityTask:
   193  		return t == nil || t.task == nil, nil
   194  	case *localActivityTask:
   195  		return t == nil || t.workflowTask == nil, nil
   196  	default:
   197  		return false, errors.New("unknown task type")
   198  	}
   199  }
   200  
   201  // Estimate is based on past poll counts
   202  func (m *pollerUsageEstimator) Estimate() (autoscaler.Usages, error) {
   203  	bits := m.atomicBits.Load()
   204  	noTaskCounts := bits >> 32           // left 32 bits
   205  	taskCounts := bits & ((1 << 32) - 1) // right 32 bits
   206  	if noTaskCounts+taskCounts == 0 {
   207  		return nil, errors.New("autoscaler.Estimator::Estimate error: not enough data")
   208  	}
   209  
   210  	return autoscaler.Usages{
   211  		autoscaler.PollerUtilizationRate: autoscaler.MilliUsage(taskCounts * 1000 / (noTaskCounts + taskCounts)),
   212  	}, nil
   213  }