k8s.io/apiserver@v0.31.1/pkg/util/flowcontrol/dropped_requests_tracker.go (about)

     1  /*
     2  Copyright 2023 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package flowcontrol
    18  
    19  import (
    20  	"sync"
    21  	"sync/atomic"
    22  	"time"
    23  
    24  	"k8s.io/utils/clock"
    25  )
    26  
    27  const (
    28  	// maxRetryAfter represents the maximum possible retryAfter.
    29  	maxRetryAfter = int64(32)
    30  )
    31  
    32  // DroppedRequestsTracker is an interface that allows tracking
    33  // a history od dropped requests in the system for the purpose
    34  // of adjusting RetryAfter header to avoid system overload.
    35  type DroppedRequestsTracker interface {
    36  	// RecordDroppedRequest records a request that was just
    37  	// dropped from processing.
    38  	RecordDroppedRequest(plName string)
    39  
    40  	// GetRetryAfter returns the current suggested value of
    41  	// RetryAfter value.
    42  	GetRetryAfter(plName string) int64
    43  }
    44  
    45  // unixStat keeps a statistic how many requests were dropped within
    46  // a single second.
    47  type unixStat struct {
    48  	unixTime int64
    49  	requests int64
    50  }
    51  
    52  type droppedRequestsStats struct {
    53  	lock sync.RWMutex
    54  
    55  	// history stores the history of dropped requests.
    56  	history []unixStat
    57  
    58  	// To reduce lock-contention, we store the information about
    59  	// the current second here, which we can then access under
    60  	// reader lock.
    61  	currentUnix  int64
    62  	currentCount atomic.Int64
    63  
    64  	retryAfter           atomic.Int64
    65  	retryAfterUpdateUnix int64
    66  }
    67  
    68  func newDroppedRequestsStats(nowUnix int64) *droppedRequestsStats {
    69  	result := &droppedRequestsStats{
    70  		// We assume that we can bump at any time after first dropped request.
    71  		retryAfterUpdateUnix: 0,
    72  	}
    73  	result.retryAfter.Store(1)
    74  	return result
    75  }
    76  
    77  func (s *droppedRequestsStats) recordDroppedRequest(unixTime int64) {
    78  	// Short path - if the current second matches passed time,
    79  	// just update the stats.
    80  	if done := func() bool {
    81  		s.lock.RLock()
    82  		defer s.lock.RUnlock()
    83  		if s.currentUnix == unixTime {
    84  			s.currentCount.Add(1)
    85  			return true
    86  		}
    87  		return false
    88  	}(); done {
    89  		return
    90  	}
    91  
    92  	// We trigger the change of <currentUnix>.
    93  	s.lock.Lock()
    94  	defer s.lock.Unlock()
    95  	if s.currentUnix == unixTime {
    96  		s.currentCount.Add(1)
    97  		return
    98  	}
    99  
   100  	s.updateHistory(s.currentUnix, s.currentCount.Load())
   101  	s.currentUnix = unixTime
   102  	s.currentCount.Store(1)
   103  
   104  	// We only consider updating retryAfter when bumping the current second.
   105  	// However, given that we didn't report anything for the current second,
   106  	// we recompute it based on statistics from the previous one.
   107  	s.updateRetryAfterIfNeededLocked(unixTime)
   108  }
   109  
   110  func (s *droppedRequestsStats) updateHistory(unixTime int64, count int64) {
   111  	s.history = append(s.history, unixStat{unixTime: unixTime, requests: count})
   112  
   113  	startIndex := 0
   114  	// Entries that exceed 2*retryAfter or maxRetryAfter are never going to be needed.
   115  	maxHistory := 2 * s.retryAfter.Load()
   116  	if maxHistory > maxRetryAfter {
   117  		maxHistory = maxRetryAfter
   118  	}
   119  	for ; startIndex < len(s.history) && unixTime-s.history[startIndex].unixTime > maxHistory; startIndex++ {
   120  	}
   121  	if startIndex > 0 {
   122  		s.history = s.history[startIndex:]
   123  	}
   124  }
   125  
   126  // updateRetryAfterIfNeededLocked updates the retryAfter based on the number of
   127  // dropped requests in the last `retryAfter` seconds:
   128  //   - if there were less than `retryAfter` dropped requests, it decreases
   129  //     retryAfter
   130  //   - if there were at least 3*`retryAfter` dropped requests, it increases
   131  //     retryAfter
   132  //
   133  // The rationale behind these numbers being fairly low is that APF is queuing
   134  // requests and rejecting (dropping) them is a last resort, which is not expected
   135  // unless a given priority level is actually overloaded.
   136  //
   137  // Additionally, we rate-limit the increases of retryAfter to wait at least
   138  // `retryAfter' seconds after the previous increase to avoid multiple bumps
   139  // on a single spike.
   140  //
   141  // We're working with the interval [unixTime-retryAfter, unixTime).
   142  func (s *droppedRequestsStats) updateRetryAfterIfNeededLocked(unixTime int64) {
   143  	retryAfter := s.retryAfter.Load()
   144  
   145  	droppedRequests := int64(0)
   146  	for i := len(s.history) - 1; i >= 0; i-- {
   147  		if unixTime-s.history[i].unixTime > retryAfter {
   148  			break
   149  		}
   150  		if s.history[i].unixTime < unixTime {
   151  			droppedRequests += s.history[i].requests
   152  		}
   153  	}
   154  
   155  	if unixTime-s.retryAfterUpdateUnix >= retryAfter && droppedRequests >= 3*retryAfter {
   156  		// We try to mimic the TCP algorithm and thus are doubling
   157  		// the retryAfter here.
   158  		retryAfter *= 2
   159  		if retryAfter >= maxRetryAfter {
   160  			retryAfter = maxRetryAfter
   161  		}
   162  		s.retryAfter.Store(retryAfter)
   163  		s.retryAfterUpdateUnix = unixTime
   164  		return
   165  	}
   166  
   167  	if droppedRequests < retryAfter && retryAfter > 1 {
   168  		// We try to mimc the TCP algorithm and thus are linearly
   169  		// scaling down the retryAfter here.
   170  		retryAfter--
   171  		s.retryAfter.Store(retryAfter)
   172  		return
   173  	}
   174  }
   175  
   176  // droppedRequestsTracker implement DroppedRequestsTracker interface
   177  // for the purpose of adjusting RetryAfter header for newly dropped
   178  // requests to avoid system overload.
   179  type droppedRequestsTracker struct {
   180  	now func() time.Time
   181  
   182  	lock    sync.RWMutex
   183  	plStats map[string]*droppedRequestsStats
   184  }
   185  
   186  // NewDroppedRequestsTracker is creating a new instance of
   187  // DroppedRequestsTracker.
   188  func NewDroppedRequestsTracker() DroppedRequestsTracker {
   189  	return newDroppedRequestsTracker(clock.RealClock{}.Now)
   190  }
   191  
   192  func newDroppedRequestsTracker(now func() time.Time) *droppedRequestsTracker {
   193  	return &droppedRequestsTracker{
   194  		now:     now,
   195  		plStats: make(map[string]*droppedRequestsStats),
   196  	}
   197  }
   198  
   199  func (t *droppedRequestsTracker) RecordDroppedRequest(plName string) {
   200  	unixTime := t.now().Unix()
   201  
   202  	stats := func() *droppedRequestsStats {
   203  		// The list of priority levels should change very infrequently,
   204  		// so in almost all cases, the fast path should be enough.
   205  		t.lock.RLock()
   206  		if plStats, ok := t.plStats[plName]; ok {
   207  			t.lock.RUnlock()
   208  			return plStats
   209  		}
   210  		t.lock.RUnlock()
   211  
   212  		// Slow path taking writer lock to update the map.
   213  		t.lock.Lock()
   214  		defer t.lock.Unlock()
   215  		if plStats, ok := t.plStats[plName]; ok {
   216  			return plStats
   217  		}
   218  		stats := newDroppedRequestsStats(unixTime)
   219  		t.plStats[plName] = stats
   220  		return stats
   221  	}()
   222  
   223  	stats.recordDroppedRequest(unixTime)
   224  }
   225  
   226  func (t *droppedRequestsTracker) GetRetryAfter(plName string) int64 {
   227  	t.lock.RLock()
   228  	defer t.lock.RUnlock()
   229  
   230  	if plStats, ok := t.plStats[plName]; ok {
   231  		return plStats.retryAfter.Load()
   232  	}
   233  	return 1
   234  }