k8s.io/apiserver@v0.31.1/pkg/util/flowcontrol/request/mutating_work_estimator.go

k8s.io/apiserver@v0.31.1/pkg/util/flowcontrol/request/mutating_work_estimator.go (about)

     1  /*
     2  Copyright 2021 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package request
    18  
    19  import (
    20  	"math"
    21  	"net/http"
    22  	"time"
    23  
    24  	apirequest "k8s.io/apiserver/pkg/endpoints/request"
    25  	"k8s.io/apiserver/pkg/util/flowcontrol/metrics"
    26  )
    27  
    28  func newMutatingWorkEstimator(countFn watchCountGetterFunc, config *WorkEstimatorConfig, maxSeatsFn maxSeatsFunc) WorkEstimatorFunc {
    29  	estimator := &mutatingWorkEstimator{
    30  		config:     config,
    31  		countFn:    countFn,
    32  		maxSeatsFn: maxSeatsFn,
    33  	}
    34  	return estimator.estimate
    35  }
    36  
    37  type mutatingWorkEstimator struct {
    38  	config     *WorkEstimatorConfig
    39  	countFn    watchCountGetterFunc
    40  	maxSeatsFn maxSeatsFunc
    41  }
    42  
    43  func (e *mutatingWorkEstimator) estimate(r *http.Request, flowSchemaName, priorityLevelName string) WorkEstimate {
    44  	minSeats := e.config.MinimumSeats
    45  	maxSeats := e.maxSeatsFn(priorityLevelName)
    46  	if maxSeats == 0 || maxSeats > e.config.MaximumSeatsLimit {
    47  		maxSeats = e.config.MaximumSeatsLimit
    48  	}
    49  
    50  	// TODO(wojtekt): Remove once we tune the algorithm to not fail
    51  	// scalability tests.
    52  	if !e.config.Enabled {
    53  		return WorkEstimate{
    54  			InitialSeats: minSeats,
    55  		}
    56  	}
    57  
    58  	requestInfo, ok := apirequest.RequestInfoFrom(r.Context())
    59  	if !ok {
    60  		// no RequestInfo should never happen, but to be on the safe side
    61  		// let's return a large value.
    62  		return WorkEstimate{
    63  			InitialSeats:      minSeats,
    64  			FinalSeats:        maxSeats,
    65  			AdditionalLatency: e.config.eventAdditionalDuration(),
    66  		}
    67  	}
    68  
    69  	if isRequestExemptFromWatchEvents(requestInfo) {
    70  		return WorkEstimate{
    71  			InitialSeats:      minSeats,
    72  			FinalSeats:        0,
    73  			AdditionalLatency: time.Duration(0),
    74  		}
    75  	}
    76  
    77  	watchCount := e.countFn(requestInfo)
    78  	metrics.ObserveWatchCount(r.Context(), priorityLevelName, flowSchemaName, watchCount)
    79  
    80  	// The cost of the request associated with the watchers of that event
    81  	// consists of three parts:
    82  	// - cost of going through the event change logic
    83  	// - cost of serialization of the event
    84  	// - cost of processing an event object for each watcher (e.g. filtering,
    85  	//     sending data over network)
    86  	// We're starting simple to get some operational experience with it and
    87  	// we will work on tuning the algorithm later. Given that the actual work
    88  	// associated with processing watch events is happening in multiple
    89  	// goroutines (proportional to the number of watchers) that are all
    90  	// resumed at once, as a starting point we assume that each such goroutine
    91  	// is taking 1/Nth of a seat for M milliseconds.
    92  	// We allow the accounting of that work in P&F to be reshaped into another
    93  	// rectangle of equal area for practical reasons.
    94  	var finalSeats uint64
    95  	var additionalLatency time.Duration
    96  
    97  	// TODO: Make this unconditional after we tune the algorithm better.
    98  	//   Technically, there is an overhead connected to processing an event after
    99  	//   the request finishes even if there is a small number of watches.
   100  	//   However, until we tune the estimation we want to stay on the safe side
   101  	//   an avoid introducing additional latency for almost every single request.
   102  	if watchCount >= int(e.config.WatchesPerSeat) {
   103  		// TODO: As described in the KEP, we should take into account that not all
   104  		//   events are equal and try to estimate the cost of a single event based on
   105  		//   some historical data about size of events.
   106  		finalSeats = uint64(math.Ceil(float64(watchCount) / e.config.WatchesPerSeat))
   107  		finalWork := SeatsTimesDuration(float64(finalSeats), e.config.eventAdditionalDuration())
   108  
   109  		// While processing individual events is highly parallel,
   110  		// the design/implementation of P&F has a couple limitations that
   111  		// make using this assumption in the P&F implementation very
   112  		// inefficient because:
   113  		// - we reserve max(initialSeats, finalSeats) for time of executing
   114  		//   both phases of the request
   115  		// - even more importantly, when a given `wide` request is the one to
   116  		//   be dispatched, we are not dispatching any other request until
   117  		//   we accumulate enough seats to dispatch the nominated one, even
   118  		//   if currently unoccupied seats would allow for dispatching some
   119  		//   other requests in the meantime
   120  		// As a consequence of these, the wider the request, the more capacity
   121  		// will effectively be blocked and unused during dispatching and
   122  		// executing this request.
   123  		//
   124  		// To mitigate the impact of it, we're capping the maximum number of
   125  		// seats that can be assigned to a given request. Thanks to it:
   126  		// 1) we reduce the amount of seat-seconds that are "wasted" during
   127  		//    dispatching and executing initial phase of the request
   128  		// 2) we are not changing the finalWork estimate - just potentially
   129  		//    reshaping it to be narrower and longer. As long as the maximum
   130  		//    seats setting will prevent dispatching too many requests at once
   131  		//    to prevent overloading kube-apiserver (and/or etcd or the VM or
   132  		//    a physical machine it is running on), we believe the relaxed
   133  		//    version should be good enough to achieve the P&F goals.
   134  		//
   135  		// TODO: Confirm that the current cap of maximumSeats allow us to
   136  		//   achieve the above.
   137  		if finalSeats > maxSeats {
   138  			finalSeats = maxSeats
   139  		}
   140  		additionalLatency = finalWork.DurationPerSeat(float64(finalSeats))
   141  	}
   142  
   143  	return WorkEstimate{
   144  		InitialSeats:      1,
   145  		FinalSeats:        finalSeats,
   146  		AdditionalLatency: additionalLatency,
   147  	}
   148  }
   149  
   150  func isRequestExemptFromWatchEvents(requestInfo *apirequest.RequestInfo) bool {
   151  	// Creating token for service account does not produce any event,
   152  	// but still serviceaccounts can have multiple watchers.
   153  	if requestInfo.Resource == "serviceaccounts" && requestInfo.Subresource == "token" {
   154  		return true
   155  	}
   156  	return false
   157  }