go.temporal.io/server@v1.23.0/common/persistence/client/health_request_rate_limiter.go (about) 1 // The MIT License 2 // 3 // Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. 4 // 5 // Copyright (c) 2020 Uber Technologies, Inc. 6 // 7 // Permission is hereby granted, free of charge, to any person obtaining a copy 8 // of this software and associated documentation files (the "Software"), to deal 9 // in the Software without restriction, including without limitation the rights 10 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 // copies of the Software, and to permit persons to whom the Software is 12 // furnished to do so, subject to the following conditions: 13 // 14 // The above copyright notice and this permission notice shall be included in 15 // all copies or substantial portions of the Software. 16 // 17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 // THE SOFTWARE. 24 25 package client 26 27 import ( 28 "context" 29 "encoding/json" 30 "math" 31 "sync/atomic" 32 "time" 33 34 "go.temporal.io/server/common/log" 35 "go.temporal.io/server/common/log/tag" 36 "go.temporal.io/server/common/metrics" 37 "go.temporal.io/server/common/persistence" 38 "go.temporal.io/server/common/primitives/timestamp" 39 "go.temporal.io/server/common/quotas" 40 ) 41 42 const ( 43 DefaultRefreshInterval = 10 * time.Second 44 DefaultRateBurstRatio = 1.0 45 DefaultInitialRateMultiplier = 1.0 46 ) 47 48 type ( 49 HealthRequestRateLimiterImpl struct { 50 enabled atomic.Bool 51 params DynamicRateLimitingParams // dynamic config map 52 curOptions dynamicRateLimitingOptions // current dynamic config values (updated on refresh) 53 54 rateLimiter *quotas.RateLimiterImpl 55 healthSignals persistence.HealthSignalAggregator 56 57 refreshTimer *time.Ticker 58 59 rateFn quotas.RateFn 60 rateToBurstRatio float64 61 62 curRateMultiplier float64 63 64 metricsHandler metrics.Handler 65 logger log.Logger 66 } 67 68 dynamicRateLimitingOptions struct { 69 Enabled bool 70 71 RefreshInterval string // string returned by json.Unmarshal will be parsed into a duration 72 73 // thresholds which should trigger backoff if exceeded 74 LatencyThreshold float64 75 ErrorThreshold float64 76 77 // if either threshold is exceeded, the current rate multiplier will be reduced by this amount 78 RateBackoffStepSize float64 79 // when the system is healthy and current rate < max rate, the current rate multiplier will be 80 // increased by this amount 81 RateIncreaseStepSize float64 82 83 RateMultiMax float64 84 RateMultiMin float64 85 } 86 ) 87 88 var _ quotas.RequestRateLimiter = (*HealthRequestRateLimiterImpl)(nil) 89 90 func NewHealthRequestRateLimiterImpl( 91 healthSignals persistence.HealthSignalAggregator, 92 rateFn quotas.RateFn, 93 params DynamicRateLimitingParams, 94 metricsHandler metrics.Handler, 95 logger log.Logger, 96 ) *HealthRequestRateLimiterImpl { 97 limiter := &HealthRequestRateLimiterImpl{ 98 enabled: atomic.Bool{}, 99 rateLimiter: quotas.NewRateLimiter(rateFn(), int(DefaultRateBurstRatio*rateFn())), 100 healthSignals: healthSignals, 101 rateFn: rateFn, 102 params: params, 103 refreshTimer: time.NewTicker(DefaultRefreshInterval), 104 rateToBurstRatio: DefaultRateBurstRatio, 105 curRateMultiplier: DefaultInitialRateMultiplier, 106 metricsHandler: metricsHandler, 107 logger: logger, 108 } 109 limiter.refreshDynamicParams() 110 return limiter 111 } 112 113 func (rl *HealthRequestRateLimiterImpl) Allow(now time.Time, request quotas.Request) bool { 114 rl.maybeRefresh() 115 if !rl.enabled.Load() { 116 return true 117 } 118 return rl.rateLimiter.AllowN(now, request.Token) 119 } 120 121 func (rl *HealthRequestRateLimiterImpl) Reserve(now time.Time, request quotas.Request) quotas.Reservation { 122 rl.maybeRefresh() 123 if !rl.enabled.Load() { 124 return quotas.NoopReservation 125 } 126 return rl.rateLimiter.ReserveN(now, request.Token) 127 } 128 129 func (rl *HealthRequestRateLimiterImpl) Wait(ctx context.Context, request quotas.Request) error { 130 rl.maybeRefresh() 131 if !rl.enabled.Load() { 132 return nil 133 } 134 return rl.rateLimiter.WaitN(ctx, request.Token) 135 } 136 137 func (rl *HealthRequestRateLimiterImpl) maybeRefresh() { 138 select { 139 case <-rl.refreshTimer.C: 140 rl.refreshDynamicParams() 141 if rl.enabled.Load() { 142 rl.refreshRate() 143 } 144 rl.updateRefreshTimer() 145 146 default: 147 // no-op 148 } 149 } 150 151 func (rl *HealthRequestRateLimiterImpl) refreshRate() { 152 if rl.latencyThresholdExceeded() || rl.errorThresholdExceeded() { 153 // limit exceeded, do backoff 154 rl.curRateMultiplier = math.Max(rl.curOptions.RateMultiMin, rl.curRateMultiplier-rl.curOptions.RateBackoffStepSize) 155 rl.metricsHandler.Gauge(metrics.DynamicRateLimiterMultiplier.Name()).Record(rl.curRateMultiplier) 156 rl.logger.Info("Health threshold exceeded, reducing rate limit.", tag.NewFloat64("newMulti", rl.curRateMultiplier), tag.NewFloat64("newRate", rl.rateLimiter.Rate()), tag.NewFloat64("latencyAvg", rl.healthSignals.AverageLatency()), tag.NewFloat64("errorRatio", rl.healthSignals.ErrorRatio())) 157 } else if rl.curRateMultiplier < rl.curOptions.RateMultiMax { 158 // already doing backoff and under thresholds, increase limit 159 rl.curRateMultiplier = math.Min(rl.curOptions.RateMultiMax, rl.curRateMultiplier+rl.curOptions.RateIncreaseStepSize) 160 rl.metricsHandler.Gauge(metrics.DynamicRateLimiterMultiplier.Name()).Record(rl.curRateMultiplier) 161 rl.logger.Info("System healthy, increasing rate limit.", tag.NewFloat64("newMulti", rl.curRateMultiplier), tag.NewFloat64("newRate", rl.rateLimiter.Rate()), tag.NewFloat64("latencyAvg", rl.healthSignals.AverageLatency()), tag.NewFloat64("errorRatio", rl.healthSignals.ErrorRatio())) 162 } 163 // Always set rate to pickup changes to underlying rate limit dynamic config 164 rl.rateLimiter.SetRPS(rl.curRateMultiplier * rl.rateFn()) 165 rl.rateLimiter.SetBurst(int(rl.rateToBurstRatio * rl.rateFn())) 166 } 167 168 func (rl *HealthRequestRateLimiterImpl) refreshDynamicParams() { 169 var options dynamicRateLimitingOptions 170 b, err := json.Marshal(rl.params()) 171 if err != nil { 172 rl.logger.Warn("Error marshalling dynamic rate limiting params. Dynamic rate limiting is disabled.", tag.Error(err)) 173 rl.enabled.Store(false) 174 return 175 } 176 177 err = json.Unmarshal(b, &options) 178 if err != nil { 179 rl.logger.Warn("Error unmarshalling dynamic rate limiting params. Dynamic rate limiting is disabled.", tag.Error(err)) 180 rl.enabled.Store(false) 181 return 182 } 183 184 rl.enabled.Store(options.Enabled) 185 rl.curOptions = options 186 } 187 188 func (rl *HealthRequestRateLimiterImpl) updateRefreshTimer() { 189 if len(rl.curOptions.RefreshInterval) > 0 { 190 if refreshDuration, err := timestamp.ParseDurationDefaultSeconds(rl.curOptions.RefreshInterval); err != nil { 191 rl.logger.Warn("Error parsing dynamic rate limit refreshInterval timestamp. Using previous value.", tag.Error(err)) 192 } else { 193 rl.refreshTimer.Reset(refreshDuration) 194 } 195 } 196 } 197 198 func (rl *HealthRequestRateLimiterImpl) latencyThresholdExceeded() bool { 199 return rl.curOptions.LatencyThreshold > 0 && rl.healthSignals.AverageLatency() > rl.curOptions.LatencyThreshold 200 } 201 202 func (rl *HealthRequestRateLimiterImpl) errorThresholdExceeded() bool { 203 return rl.curOptions.ErrorThreshold > 0 && rl.healthSignals.ErrorRatio() > rl.curOptions.ErrorThreshold 204 }