github.com/letsencrypt/boulder@v0.20251208.0/ratelimits/limiter.go (about) 1 package ratelimits 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "math" 8 "math/rand/v2" 9 "slices" 10 "strings" 11 "time" 12 13 "github.com/jmhodges/clock" 14 "github.com/prometheus/client_golang/prometheus" 15 "github.com/prometheus/client_golang/prometheus/promauto" 16 17 berrors "github.com/letsencrypt/boulder/errors" 18 ) 19 20 const ( 21 // Allowed is used for rate limit metrics, it's the value of the 'decision' 22 // label when a request was allowed. 23 Allowed = "allowed" 24 25 // Denied is used for rate limit metrics, it's the value of the 'decision' 26 // label when a request was denied. 27 Denied = "denied" 28 ) 29 30 // allowedDecision is an "allowed" *Decision that should be returned when a 31 // checked limit is found to be disabled. 32 var allowedDecision = &Decision{allowed: true, remaining: math.MaxInt64} 33 34 // Limiter provides a high-level interface for rate limiting requests by 35 // utilizing a token bucket-style approach. 36 type Limiter struct { 37 // source is used to store buckets. It must be safe for concurrent use. 38 source Source 39 clk clock.Clock 40 41 spendLatency *prometheus.HistogramVec 42 } 43 44 // NewLimiter returns a new *Limiter. The provided source must be safe for 45 // concurrent use. 46 func NewLimiter(clk clock.Clock, source Source, stats prometheus.Registerer) (*Limiter, error) { 47 spendLatency := promauto.With(stats).NewHistogramVec(prometheus.HistogramOpts{ 48 Name: "ratelimits_spend_latency", 49 Help: fmt.Sprintf("Latency of ratelimit checks labeled by limit=[name] and decision=[%s|%s], in seconds", Allowed, Denied), 50 // Exponential buckets ranging from 0.0005s to 3s. 51 Buckets: prometheus.ExponentialBuckets(0.0005, 3, 8), 52 }, []string{"limit", "decision"}) 53 54 return &Limiter{ 55 source: source, 56 clk: clk, 57 spendLatency: spendLatency, 58 }, nil 59 } 60 61 // Decision represents the result of a rate limit check or spend operation. To 62 // check the result of a *Decision, call the Result() method. 63 type Decision struct { 64 // allowed is true if the bucket possessed enough capacity to allow the 65 // request given the cost. 66 allowed bool 67 68 // remaining is the number of requests the client is allowed to make before 69 // they're rate limited. 70 remaining int64 71 72 // retryIn is the duration the client MUST wait before they're allowed to 73 // make a request. 74 retryIn time.Duration 75 76 // resetIn is the duration the bucket will take to refill to its maximum 77 // capacity, assuming no further requests are made. 78 resetIn time.Duration 79 80 // newTAT indicates the time at which the bucket will be full. It is the 81 // theoretical arrival time (TAT) of next request. It must be no more than 82 // (burst * (period / count)) in the future at any single point in time. 83 newTAT time.Time 84 85 // transaction is the Transaction that resulted in this Decision. It is 86 // included for the production of verbose Subscriber-facing errors. It is 87 // set by the Limiter before returning the Decision. 88 transaction Transaction 89 } 90 91 // Result translates a denied *Decision into a berrors.RateLimitError for the 92 // Subscriber, or returns nil if the *Decision allows the request. The error 93 // message includes a human-readable description of the exceeded rate limit and 94 // a retry-after timestamp. 95 func (d *Decision) Result(now time.Time) error { 96 if d.allowed { 97 return nil 98 } 99 100 // Add 0-3% jitter to the RetryIn duration to prevent thundering herd. 101 jitter := time.Duration(float64(d.retryIn) * 0.03 * rand.Float64()) 102 retryAfter := d.retryIn + jitter 103 retryAfterTs := now.UTC().Add(retryAfter).Format("2006-01-02 15:04:05 MST") 104 105 // There is no case for FailedAuthorizationsForPausingPerDomainPerAccount 106 // because the RA will pause clients who exceed that ratelimit. 107 switch d.transaction.limit.Name { 108 case NewRegistrationsPerIPAddress: 109 return berrors.RegistrationsPerIPAddressError( 110 retryAfter, 111 "too many new registrations (%d) from this IP address in the last %s, retry after %s", 112 d.transaction.limit.Burst, 113 d.transaction.limit.Period.Duration, 114 retryAfterTs, 115 ) 116 117 case NewRegistrationsPerIPv6Range: 118 return berrors.RegistrationsPerIPv6RangeError( 119 retryAfter, 120 "too many new registrations (%d) from this /48 subnet of IPv6 addresses in the last %s, retry after %s", 121 d.transaction.limit.Burst, 122 d.transaction.limit.Period.Duration, 123 retryAfterTs, 124 ) 125 case NewOrdersPerAccount: 126 return berrors.NewOrdersPerAccountError( 127 retryAfter, 128 "too many new orders (%d) from this account in the last %s, retry after %s", 129 d.transaction.limit.Burst, 130 d.transaction.limit.Period.Duration, 131 retryAfterTs, 132 ) 133 134 case FailedAuthorizationsPerDomainPerAccount: 135 // Uses bucket key 'enum:regId:identValue'. 136 idx := strings.LastIndex(d.transaction.bucketKey, ":") 137 if idx == -1 { 138 return berrors.InternalServerError("unrecognized bucket key while generating error") 139 } 140 identValue := d.transaction.bucketKey[idx+1:] 141 return berrors.FailedAuthorizationsPerDomainPerAccountError( 142 retryAfter, 143 "too many failed authorizations (%d) for %q in the last %s, retry after %s", 144 d.transaction.limit.Burst, 145 identValue, 146 d.transaction.limit.Period.Duration, 147 retryAfterTs, 148 ) 149 150 case CertificatesPerDomain, CertificatesPerDomainPerAccount: 151 // Uses bucket key 'enum:domainOrCIDR' or 'enum:regId:domainOrCIDR' respectively. 152 idx := strings.LastIndex(d.transaction.bucketKey, ":") 153 if idx == -1 { 154 return berrors.InternalServerError("unrecognized bucket key while generating error") 155 } 156 domainOrCIDR := d.transaction.bucketKey[idx+1:] 157 return berrors.CertificatesPerDomainError( 158 retryAfter, 159 "too many certificates (%d) already issued for %q in the last %s, retry after %s", 160 d.transaction.limit.Burst, 161 domainOrCIDR, 162 d.transaction.limit.Period.Duration, 163 retryAfterTs, 164 ) 165 166 case CertificatesPerFQDNSet: 167 return berrors.CertificatesPerFQDNSetError( 168 retryAfter, 169 "too many certificates (%d) already issued for this exact set of identifiers in the last %s, retry after %s", 170 d.transaction.limit.Burst, 171 d.transaction.limit.Period.Duration, 172 retryAfterTs, 173 ) 174 175 case LimitOverrideRequestsPerIPAddress: 176 return berrors.LimitOverrideRequestsPerIPAddressError( 177 retryAfter, 178 "too many override request form submissions (%d) from this IP address in the last %s, retry after %s", 179 d.transaction.limit.Burst, 180 d.transaction.limit.Period.Duration, 181 retryAfterTs, 182 ) 183 184 default: 185 return berrors.InternalServerError("cannot generate error for unknown rate limit") 186 } 187 } 188 189 // Check DOES NOT deduct the cost of the request from the provided bucket's 190 // capacity. The returned *Decision indicates whether the capacity exists to 191 // satisfy the cost and represents the hypothetical state of the bucket IF the 192 // cost WERE to be deducted. If no bucket exists it will NOT be created. No 193 // state is persisted to the underlying datastore. 194 func (l *Limiter) Check(ctx context.Context, txn Transaction) (*Decision, error) { 195 if txn.allowOnly() { 196 return allowedDecision, nil 197 } 198 // Remove cancellation from the request context so that transactions are not 199 // interrupted by a client disconnect. 200 ctx = context.WithoutCancel(ctx) 201 tat, err := l.source.Get(ctx, txn.bucketKey) 202 if err != nil { 203 if !errors.Is(err, ErrBucketNotFound) { 204 return nil, err 205 } 206 // First request from this client. No need to initialize the bucket 207 // because this is a check, not a spend. A TAT of "now" is equivalent to 208 // a full bucket. 209 return maybeSpend(l.clk, txn, l.clk.Now()), nil 210 } 211 return maybeSpend(l.clk, txn, tat), nil 212 } 213 214 // Spend attempts to deduct the cost from the provided bucket's capacity. The 215 // returned *Decision indicates whether the capacity existed to satisfy the cost 216 // and represents the current state of the bucket. If no bucket exists it WILL 217 // be created WITH the cost factored into its initial state. The new bucket 218 // state is persisted to the underlying datastore, if applicable, before 219 // returning. 220 func (l *Limiter) Spend(ctx context.Context, txn Transaction) (*Decision, error) { 221 return l.BatchSpend(ctx, []Transaction{txn}) 222 } 223 224 func prepareBatch(txns []Transaction) ([]Transaction, []string, error) { 225 var bucketKeys []string 226 var transactions []Transaction 227 for _, txn := range txns { 228 if txn.allowOnly() { 229 // Ignore allow-only transactions. 230 continue 231 } 232 if slices.Contains(bucketKeys, txn.bucketKey) { 233 return nil, nil, fmt.Errorf("found duplicate bucket %q in batch", txn.bucketKey) 234 } 235 bucketKeys = append(bucketKeys, txn.bucketKey) 236 transactions = append(transactions, txn) 237 } 238 return transactions, bucketKeys, nil 239 } 240 241 func stricter(existing *Decision, incoming *Decision) *Decision { 242 if existing.retryIn == incoming.retryIn { 243 if existing.remaining < incoming.remaining { 244 return existing 245 } 246 return incoming 247 } 248 if existing.retryIn > incoming.retryIn { 249 return existing 250 } 251 return incoming 252 } 253 254 // BatchSpend attempts to deduct the costs from the provided buckets' 255 // capacities. If applicable, new bucket states are persisted to the underlying 256 // datastore before returning. Non-existent buckets will be initialized WITH the 257 // cost factored into the initial state. The returned *Decision represents the 258 // strictest of all *Decisions reached in the batch. 259 func (l *Limiter) BatchSpend(ctx context.Context, txns []Transaction) (*Decision, error) { 260 start := l.clk.Now() 261 262 batch, bucketKeys, err := prepareBatch(txns) 263 if err != nil { 264 return nil, err 265 } 266 if len(batch) == 0 { 267 // All Transactions were allow-only. 268 return allowedDecision, nil 269 } 270 271 // Remove cancellation from the request context so that transactions are not 272 // interrupted by a client disconnect. 273 ctx = context.WithoutCancel(ctx) 274 tats, err := l.source.BatchGet(ctx, bucketKeys) 275 if err != nil { 276 return nil, fmt.Errorf("batch get for %d keys: %w", len(bucketKeys), err) 277 } 278 batchDecision := allowedDecision 279 newBuckets := make(map[string]time.Time) 280 incrBuckets := make(map[string]increment) 281 staleBuckets := make(map[string]time.Time) 282 txnOutcomes := make(map[Transaction]string) 283 284 for _, txn := range batch { 285 storedTAT, bucketExists := tats[txn.bucketKey] 286 d := maybeSpend(l.clk, txn, storedTAT) 287 288 if d.allowed && (storedTAT != d.newTAT) && txn.spend { 289 if !bucketExists { 290 newBuckets[txn.bucketKey] = d.newTAT 291 } else if storedTAT.After(l.clk.Now()) { 292 incrBuckets[txn.bucketKey] = increment{ 293 cost: time.Duration(txn.cost * txn.limit.emissionInterval), 294 ttl: time.Duration(txn.limit.burstOffset), 295 } 296 } else { 297 staleBuckets[txn.bucketKey] = d.newTAT 298 } 299 } 300 301 if !txn.spendOnly() { 302 // Spend-only Transactions are best-effort and do not contribute to 303 // the batchDecision. 304 batchDecision = stricter(batchDecision, d) 305 } 306 307 txnOutcomes[txn] = Denied 308 if d.allowed { 309 txnOutcomes[txn] = Allowed 310 } 311 } 312 313 if batchDecision.allowed { 314 if len(newBuckets) > 0 { 315 // Use BatchSetNotExisting to create new buckets so that we detect 316 // if concurrent requests have created this bucket at the same time, 317 // which would result in overwriting if we used a plain "SET" 318 // command. If that happens, fall back to incrementing. 319 alreadyExists, err := l.source.BatchSetNotExisting(ctx, newBuckets) 320 if err != nil { 321 return nil, fmt.Errorf("batch set for %d keys: %w", len(newBuckets), err) 322 } 323 // Find the original transaction in order to compute the increment 324 // and set the TTL. 325 for _, txn := range batch { 326 if alreadyExists[txn.bucketKey] { 327 incrBuckets[txn.bucketKey] = increment{ 328 cost: time.Duration(txn.cost * txn.limit.emissionInterval), 329 ttl: time.Duration(txn.limit.burstOffset), 330 } 331 } 332 } 333 } 334 335 if len(incrBuckets) > 0 { 336 err = l.source.BatchIncrement(ctx, incrBuckets) 337 if err != nil { 338 return nil, fmt.Errorf("batch increment for %d keys: %w", len(incrBuckets), err) 339 } 340 } 341 342 if len(staleBuckets) > 0 { 343 // Incrementing a TAT in the past grants unintended burst capacity. 344 // So instead we overwrite it with a TAT of now + increment. This 345 // approach may cause a race condition where only the last spend is 346 // saved, but it's preferable to the alternative. 347 err = l.source.BatchSet(ctx, staleBuckets) 348 if err != nil { 349 return nil, fmt.Errorf("batch set for %d keys: %w", len(staleBuckets), err) 350 } 351 } 352 } 353 354 // Observe latency equally across all transactions in the batch. 355 totalLatency := l.clk.Since(start) 356 perTxnLatency := totalLatency / time.Duration(len(txnOutcomes)) 357 for txn, outcome := range txnOutcomes { 358 l.spendLatency.WithLabelValues(txn.limit.Name.String(), outcome).Observe(perTxnLatency.Seconds()) 359 } 360 return batchDecision, nil 361 } 362 363 // Refund attempts to refund all of the cost to the capacity of the specified 364 // bucket. The returned *Decision indicates whether the refund was successful 365 // and represents the current state of the bucket. The new bucket state is 366 // persisted to the underlying datastore, if applicable, before returning. If no 367 // bucket exists it will NOT be created. Spend-only Transactions are assumed to 368 // be refundable. Check-only Transactions are never refunded. 369 // 370 // Note: The amount refunded cannot cause the bucket to exceed its maximum 371 // capacity. Partial refunds are allowed and are considered successful. For 372 // instance, if a bucket has a maximum capacity of 10 and currently has 5 373 // requests remaining, a refund request of 7 will result in the bucket reaching 374 // its maximum capacity of 10, not 12. 375 func (l *Limiter) Refund(ctx context.Context, txn Transaction) (*Decision, error) { 376 return l.BatchRefund(ctx, []Transaction{txn}) 377 } 378 379 // BatchRefund attempts to refund all or some of the costs to the provided 380 // buckets' capacities. Non-existent buckets will NOT be initialized. The new 381 // bucket state is persisted to the underlying datastore, if applicable, before 382 // returning. Spend-only Transactions are assumed to be refundable. Check-only 383 // Transactions are never refunded. The returned *Decision represents the 384 // strictest of all *Decisions reached in the batch. 385 func (l *Limiter) BatchRefund(ctx context.Context, txns []Transaction) (*Decision, error) { 386 batch, bucketKeys, err := prepareBatch(txns) 387 if err != nil { 388 return nil, err 389 } 390 if len(batch) == 0 { 391 // All Transactions were allow-only. 392 return allowedDecision, nil 393 } 394 395 // Remove cancellation from the request context so that transactions are not 396 // interrupted by a client disconnect. 397 ctx = context.WithoutCancel(ctx) 398 tats, err := l.source.BatchGet(ctx, bucketKeys) 399 if err != nil { 400 return nil, fmt.Errorf("batch get for %d keys: %w", len(bucketKeys), err) 401 } 402 403 batchDecision := allowedDecision 404 incrBuckets := make(map[string]increment) 405 406 for _, txn := range batch { 407 tat, bucketExists := tats[txn.bucketKey] 408 if !bucketExists { 409 // Ignore non-existent bucket. 410 continue 411 } 412 413 if txn.checkOnly() { 414 // The cost of check-only transactions are never refunded. 415 txn.cost = 0 416 } 417 d := maybeRefund(l.clk, txn, tat) 418 batchDecision = stricter(batchDecision, d) 419 if d.allowed && tat != d.newTAT { 420 // New bucket state should be persisted. 421 incrBuckets[txn.bucketKey] = increment{ 422 cost: time.Duration(-txn.cost * txn.limit.emissionInterval), 423 ttl: time.Duration(txn.limit.burstOffset), 424 } 425 } 426 } 427 428 if len(incrBuckets) > 0 { 429 err = l.source.BatchIncrement(ctx, incrBuckets) 430 if err != nil { 431 return nil, fmt.Errorf("batch increment for %d keys: %w", len(incrBuckets), err) 432 } 433 } 434 return batchDecision, nil 435 } 436 437 // BatchReset resets the specified buckets to their maximum capacity using the 438 // provided reset Transactions. The new bucket state is persisted to the 439 // underlying datastore before returning. 440 func (l *Limiter) BatchReset(ctx context.Context, txns []Transaction) error { 441 var bucketKeys []string 442 for _, txn := range txns { 443 if txn.allowOnly() { 444 // Ignore allow-only transactions. 445 continue 446 } 447 if !txn.resetOnly() { 448 return fmt.Errorf("found reset-only transaction, received check=%t spend=%t reset=%t", txn.check, txn.spend, txn.reset) 449 } 450 if slices.Contains(bucketKeys, txn.bucketKey) { 451 return fmt.Errorf("found duplicate bucket %q in batch", txn.bucketKey) 452 } 453 bucketKeys = append(bucketKeys, txn.bucketKey) 454 } 455 if len(bucketKeys) == 0 { 456 return nil 457 } 458 // Remove cancellation from the request context so that transactions are not 459 // interrupted by a client disconnect. 460 ctx = context.WithoutCancel(ctx) 461 return l.source.BatchDelete(ctx, bucketKeys) 462 }