go.temporal.io/server@v1.23.0/common/persistence/client/health_request_rate_limiter.go (about)

     1  // The MIT License
     2  //
     3  // Copyright (c) 2020 Temporal Technologies Inc.  All rights reserved.
     4  //
     5  // Copyright (c) 2020 Uber Technologies, Inc.
     6  //
     7  // Permission is hereby granted, free of charge, to any person obtaining a copy
     8  // of this software and associated documentation files (the "Software"), to deal
     9  // in the Software without restriction, including without limitation the rights
    10  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    11  // copies of the Software, and to permit persons to whom the Software is
    12  // furnished to do so, subject to the following conditions:
    13  //
    14  // The above copyright notice and this permission notice shall be included in
    15  // all copies or substantial portions of the Software.
    16  //
    17  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    18  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    19  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    20  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    21  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    22  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    23  // THE SOFTWARE.
    24  
    25  package client
    26  
    27  import (
    28  	"context"
    29  	"encoding/json"
    30  	"math"
    31  	"sync/atomic"
    32  	"time"
    33  
    34  	"go.temporal.io/server/common/log"
    35  	"go.temporal.io/server/common/log/tag"
    36  	"go.temporal.io/server/common/metrics"
    37  	"go.temporal.io/server/common/persistence"
    38  	"go.temporal.io/server/common/primitives/timestamp"
    39  	"go.temporal.io/server/common/quotas"
    40  )
    41  
    42  const (
    43  	DefaultRefreshInterval       = 10 * time.Second
    44  	DefaultRateBurstRatio        = 1.0
    45  	DefaultInitialRateMultiplier = 1.0
    46  )
    47  
    48  type (
    49  	HealthRequestRateLimiterImpl struct {
    50  		enabled    atomic.Bool
    51  		params     DynamicRateLimitingParams  // dynamic config map
    52  		curOptions dynamicRateLimitingOptions // current dynamic config values (updated on refresh)
    53  
    54  		rateLimiter   *quotas.RateLimiterImpl
    55  		healthSignals persistence.HealthSignalAggregator
    56  
    57  		refreshTimer *time.Ticker
    58  
    59  		rateFn           quotas.RateFn
    60  		rateToBurstRatio float64
    61  
    62  		curRateMultiplier float64
    63  
    64  		metricsHandler metrics.Handler
    65  		logger         log.Logger
    66  	}
    67  
    68  	dynamicRateLimitingOptions struct {
    69  		Enabled bool
    70  
    71  		RefreshInterval string // string returned by json.Unmarshal will be parsed into a duration
    72  
    73  		// thresholds which should trigger backoff if exceeded
    74  		LatencyThreshold float64
    75  		ErrorThreshold   float64
    76  
    77  		// if either threshold is exceeded, the current rate multiplier will be reduced by this amount
    78  		RateBackoffStepSize float64
    79  		// when the system is healthy and current rate < max rate, the current rate multiplier will be
    80  		// increased by this amount
    81  		RateIncreaseStepSize float64
    82  
    83  		RateMultiMax float64
    84  		RateMultiMin float64
    85  	}
    86  )
    87  
    88  var _ quotas.RequestRateLimiter = (*HealthRequestRateLimiterImpl)(nil)
    89  
    90  func NewHealthRequestRateLimiterImpl(
    91  	healthSignals persistence.HealthSignalAggregator,
    92  	rateFn quotas.RateFn,
    93  	params DynamicRateLimitingParams,
    94  	metricsHandler metrics.Handler,
    95  	logger log.Logger,
    96  ) *HealthRequestRateLimiterImpl {
    97  	limiter := &HealthRequestRateLimiterImpl{
    98  		enabled:           atomic.Bool{},
    99  		rateLimiter:       quotas.NewRateLimiter(rateFn(), int(DefaultRateBurstRatio*rateFn())),
   100  		healthSignals:     healthSignals,
   101  		rateFn:            rateFn,
   102  		params:            params,
   103  		refreshTimer:      time.NewTicker(DefaultRefreshInterval),
   104  		rateToBurstRatio:  DefaultRateBurstRatio,
   105  		curRateMultiplier: DefaultInitialRateMultiplier,
   106  		metricsHandler:    metricsHandler,
   107  		logger:            logger,
   108  	}
   109  	limiter.refreshDynamicParams()
   110  	return limiter
   111  }
   112  
   113  func (rl *HealthRequestRateLimiterImpl) Allow(now time.Time, request quotas.Request) bool {
   114  	rl.maybeRefresh()
   115  	if !rl.enabled.Load() {
   116  		return true
   117  	}
   118  	return rl.rateLimiter.AllowN(now, request.Token)
   119  }
   120  
   121  func (rl *HealthRequestRateLimiterImpl) Reserve(now time.Time, request quotas.Request) quotas.Reservation {
   122  	rl.maybeRefresh()
   123  	if !rl.enabled.Load() {
   124  		return quotas.NoopReservation
   125  	}
   126  	return rl.rateLimiter.ReserveN(now, request.Token)
   127  }
   128  
   129  func (rl *HealthRequestRateLimiterImpl) Wait(ctx context.Context, request quotas.Request) error {
   130  	rl.maybeRefresh()
   131  	if !rl.enabled.Load() {
   132  		return nil
   133  	}
   134  	return rl.rateLimiter.WaitN(ctx, request.Token)
   135  }
   136  
   137  func (rl *HealthRequestRateLimiterImpl) maybeRefresh() {
   138  	select {
   139  	case <-rl.refreshTimer.C:
   140  		rl.refreshDynamicParams()
   141  		if rl.enabled.Load() {
   142  			rl.refreshRate()
   143  		}
   144  		rl.updateRefreshTimer()
   145  
   146  	default:
   147  		// no-op
   148  	}
   149  }
   150  
   151  func (rl *HealthRequestRateLimiterImpl) refreshRate() {
   152  	if rl.latencyThresholdExceeded() || rl.errorThresholdExceeded() {
   153  		// limit exceeded, do backoff
   154  		rl.curRateMultiplier = math.Max(rl.curOptions.RateMultiMin, rl.curRateMultiplier-rl.curOptions.RateBackoffStepSize)
   155  		rl.metricsHandler.Gauge(metrics.DynamicRateLimiterMultiplier.Name()).Record(rl.curRateMultiplier)
   156  		rl.logger.Info("Health threshold exceeded, reducing rate limit.", tag.NewFloat64("newMulti", rl.curRateMultiplier), tag.NewFloat64("newRate", rl.rateLimiter.Rate()), tag.NewFloat64("latencyAvg", rl.healthSignals.AverageLatency()), tag.NewFloat64("errorRatio", rl.healthSignals.ErrorRatio()))
   157  	} else if rl.curRateMultiplier < rl.curOptions.RateMultiMax {
   158  		// already doing backoff and under thresholds, increase limit
   159  		rl.curRateMultiplier = math.Min(rl.curOptions.RateMultiMax, rl.curRateMultiplier+rl.curOptions.RateIncreaseStepSize)
   160  		rl.metricsHandler.Gauge(metrics.DynamicRateLimiterMultiplier.Name()).Record(rl.curRateMultiplier)
   161  		rl.logger.Info("System healthy, increasing rate limit.", tag.NewFloat64("newMulti", rl.curRateMultiplier), tag.NewFloat64("newRate", rl.rateLimiter.Rate()), tag.NewFloat64("latencyAvg", rl.healthSignals.AverageLatency()), tag.NewFloat64("errorRatio", rl.healthSignals.ErrorRatio()))
   162  	}
   163  	// Always set rate to pickup changes to underlying rate limit dynamic config
   164  	rl.rateLimiter.SetRPS(rl.curRateMultiplier * rl.rateFn())
   165  	rl.rateLimiter.SetBurst(int(rl.rateToBurstRatio * rl.rateFn()))
   166  }
   167  
   168  func (rl *HealthRequestRateLimiterImpl) refreshDynamicParams() {
   169  	var options dynamicRateLimitingOptions
   170  	b, err := json.Marshal(rl.params())
   171  	if err != nil {
   172  		rl.logger.Warn("Error marshalling dynamic rate limiting params. Dynamic rate limiting is disabled.", tag.Error(err))
   173  		rl.enabled.Store(false)
   174  		return
   175  	}
   176  
   177  	err = json.Unmarshal(b, &options)
   178  	if err != nil {
   179  		rl.logger.Warn("Error unmarshalling dynamic rate limiting params. Dynamic rate limiting is disabled.", tag.Error(err))
   180  		rl.enabled.Store(false)
   181  		return
   182  	}
   183  
   184  	rl.enabled.Store(options.Enabled)
   185  	rl.curOptions = options
   186  }
   187  
   188  func (rl *HealthRequestRateLimiterImpl) updateRefreshTimer() {
   189  	if len(rl.curOptions.RefreshInterval) > 0 {
   190  		if refreshDuration, err := timestamp.ParseDurationDefaultSeconds(rl.curOptions.RefreshInterval); err != nil {
   191  			rl.logger.Warn("Error parsing dynamic rate limit refreshInterval timestamp. Using previous value.", tag.Error(err))
   192  		} else {
   193  			rl.refreshTimer.Reset(refreshDuration)
   194  		}
   195  	}
   196  }
   197  
   198  func (rl *HealthRequestRateLimiterImpl) latencyThresholdExceeded() bool {
   199  	return rl.curOptions.LatencyThreshold > 0 && rl.healthSignals.AverageLatency() > rl.curOptions.LatencyThreshold
   200  }
   201  
   202  func (rl *HealthRequestRateLimiterImpl) errorThresholdExceeded() bool {
   203  	return rl.curOptions.ErrorThreshold > 0 && rl.healthSignals.ErrorRatio() > rl.curOptions.ErrorThreshold
   204  }