k8s.io/apiserver@v0.31.1/pkg/util/flowcontrol/fairqueuing/queueset/queueset.go

k8s.io/apiserver@v0.31.1/pkg/util/flowcontrol/fairqueuing/queueset/queueset.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package queueset
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"math"
    24  	"sync"
    25  	"time"
    26  
    27  	"k8s.io/apimachinery/pkg/util/sets"
    28  	"k8s.io/apiserver/pkg/util/flowcontrol/debug"
    29  	fq "k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing"
    30  	"k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/eventclock"
    31  	"k8s.io/apiserver/pkg/util/flowcontrol/fairqueuing/promise"
    32  	"k8s.io/apiserver/pkg/util/flowcontrol/metrics"
    33  	fqrequest "k8s.io/apiserver/pkg/util/flowcontrol/request"
    34  	"k8s.io/apiserver/pkg/util/shufflesharding"
    35  	"k8s.io/klog/v2"
    36  )
    37  
    38  const nsTimeFmt = "2006-01-02 15:04:05.000000000"
    39  
    40  // queueSetFactory implements the QueueSetFactory interface
    41  // queueSetFactory makes QueueSet objects.
    42  type queueSetFactory struct {
    43  	clock                 eventclock.Interface
    44  	promiseFactoryFactory promiseFactoryFactory
    45  }
    46  
    47  // promiseFactory returns a WriteOnce
    48  // - whose Set method is invoked with the queueSet locked, and
    49  // - whose Get method is invoked with the queueSet not locked.
    50  // The parameters are the same as for `promise.NewWriteOnce`.
    51  type promiseFactory func(initial interface{}, doneCtx context.Context, doneVal interface{}) promise.WriteOnce
    52  
    53  // promiseFactoryFactory returns the promiseFactory to use for the given queueSet
    54  type promiseFactoryFactory func(*queueSet) promiseFactory
    55  
    56  // `*queueSetCompleter` implements QueueSetCompleter.  Exactly one of
    57  // the fields `factory` and `theSet` is non-nil.
    58  type queueSetCompleter struct {
    59  	factory              *queueSetFactory
    60  	reqsGaugePair        metrics.RatioedGaugePair
    61  	execSeatsGauge       metrics.RatioedGauge
    62  	seatDemandIntegrator metrics.Gauge
    63  	theSet               *queueSet
    64  	qCfg                 fq.QueuingConfig
    65  	dealer               *shufflesharding.Dealer
    66  }
    67  
    68  // queueSet implements the Fair Queuing for Server Requests technique
    69  // described in this package's doc, and a pointer to one implements
    70  // the QueueSet interface.  The fields listed before the lock
    71  // should not be changed; the fields listed after the
    72  // lock must be accessed only while holding the lock.
    73  //
    74  // The methods of this type follow the naming convention that the
    75  // suffix "Locked" means the caller must hold the lock; for a method
    76  // whose name does not end in "Locked" either acquires the lock or
    77  // does not care about locking.
    78  //
    79  // The methods of this type also follow the convention that the suffix
    80  // "ToBoundLocked" means that the caller may have to follow up with a
    81  // call to `boundNextDispatchLocked`.  This is so for a method that
    82  // changes what request is oldest in a queue, because that change means
    83  // that the anti-windup hack in boundNextDispatchLocked needs to be
    84  // applied wrt the revised oldest request in the queue.
    85  type queueSet struct {
    86  	clock                    eventclock.Interface
    87  	estimatedServiceDuration time.Duration
    88  
    89  	reqsGaugePair metrics.RatioedGaugePair // .RequestsExecuting covers regular phase only
    90  
    91  	execSeatsGauge metrics.RatioedGauge // for all phases of execution
    92  
    93  	seatDemandIntegrator metrics.Gauge
    94  
    95  	promiseFactory promiseFactory
    96  
    97  	lock sync.Mutex
    98  
    99  	// qCfg holds the current queuing configuration.  Its
   100  	// DesiredNumQueues may be less than the current number of queues.
   101  	// If its DesiredNumQueues is zero then its other queuing
   102  	// parameters retain the settings they had when DesiredNumQueues
   103  	// was last non-zero (if ever).
   104  	qCfg fq.QueuingConfig
   105  
   106  	// the current dispatching configuration.
   107  	dCfg fq.DispatchingConfig
   108  
   109  	// If `qCfg.DesiredNumQueues` is non-zero then dealer is not nil
   110  	// and is good for `qCfg`.
   111  	dealer *shufflesharding.Dealer
   112  
   113  	// queues may be longer than the desired number, while the excess
   114  	// queues are still draining.
   115  	queues []*queue
   116  
   117  	// currentR is the amount of seat-seconds allocated per queue since process startup.
   118  	// This is our generalization of the progress meter named R in the original fair queuing work.
   119  	currentR fqrequest.SeatSeconds
   120  
   121  	// lastRealTime is what `clock.Now()` yielded when `virtualTime` was last updated
   122  	lastRealTime time.Time
   123  
   124  	// robinIndex is the index of the last queue dispatched
   125  	robinIndex int
   126  
   127  	// totRequestsWaiting is the sum, over all the queues, of the
   128  	// number of requests waiting in that queue
   129  	totRequestsWaiting int
   130  
   131  	// totRequestsExecuting is the total number of requests of this
   132  	// queueSet that are currently executing.  That is the same as the
   133  	// sum, over all the queues, of the number of requests executing
   134  	// from that queue.
   135  	totRequestsExecuting int
   136  
   137  	// requestsExecutingSet is the set of requests executing in the real world IF
   138  	// there are no queues; otherwise the requests are tracked in the queues.
   139  	requestsExecutingSet sets.Set[*request]
   140  
   141  	// totSeatsInUse is the number of total "seats" in use by all the
   142  	// request(s) that are currently executing in this queueset.
   143  	totSeatsInUse int
   144  
   145  	// totSeatsWaiting is the sum, over all the waiting requests, of their
   146  	// max width.
   147  	totSeatsWaiting int
   148  
   149  	// enqueues is the number of requests that have ever been enqueued
   150  	enqueues int
   151  
   152  	// totRequestsDispatched is the total number of requests of this
   153  	// queueSet that have been processed.
   154  	totRequestsDispatched int
   155  
   156  	// totRequestsRejected is the total number of requests of this
   157  	// queueSet that have been rejected.
   158  	totRequestsRejected int
   159  
   160  	// totRequestsTimedout is the total number of requests of this
   161  	// queueSet that have been timeouted.
   162  	totRequestsTimedout int
   163  
   164  	// totRequestsCancelled is the total number of requests of this
   165  	// queueSet that have been cancelled.
   166  	totRequestsCancelled int
   167  }
   168  
   169  // NewQueueSetFactory creates a new QueueSetFactory object
   170  func NewQueueSetFactory(c eventclock.Interface) fq.QueueSetFactory {
   171  	return newTestableQueueSetFactory(c, ordinaryPromiseFactoryFactory)
   172  }
   173  
   174  // newTestableQueueSetFactory creates a new QueueSetFactory object with the given promiseFactoryFactory
   175  func newTestableQueueSetFactory(c eventclock.Interface, promiseFactoryFactory promiseFactoryFactory) fq.QueueSetFactory {
   176  	return &queueSetFactory{
   177  		clock:                 c,
   178  		promiseFactoryFactory: promiseFactoryFactory,
   179  	}
   180  }
   181  
   182  func (qsf *queueSetFactory) BeginConstruction(qCfg fq.QueuingConfig, reqsGaugePair metrics.RatioedGaugePair, execSeatsGauge metrics.RatioedGauge, seatDemandIntegrator metrics.Gauge) (fq.QueueSetCompleter, error) {
   183  	dealer, err := checkConfig(qCfg)
   184  	if err != nil {
   185  		return nil, err
   186  	}
   187  	return &queueSetCompleter{
   188  		factory:              qsf,
   189  		reqsGaugePair:        reqsGaugePair,
   190  		execSeatsGauge:       execSeatsGauge,
   191  		seatDemandIntegrator: seatDemandIntegrator,
   192  		qCfg:                 qCfg,
   193  		dealer:               dealer}, nil
   194  }
   195  
   196  // checkConfig returns a non-nil Dealer if the config is valid and
   197  // calls for one, and returns a non-nil error if the given config is
   198  // invalid.
   199  func checkConfig(qCfg fq.QueuingConfig) (*shufflesharding.Dealer, error) {
   200  	if qCfg.DesiredNumQueues <= 0 {
   201  		return nil, nil
   202  	}
   203  	dealer, err := shufflesharding.NewDealer(qCfg.DesiredNumQueues, qCfg.HandSize)
   204  	if err != nil {
   205  		err = fmt.Errorf("the QueueSetConfig implies an invalid shuffle sharding config (DesiredNumQueues is deckSize): %w", err)
   206  	}
   207  	return dealer, err
   208  }
   209  
   210  func (qsc *queueSetCompleter) Complete(dCfg fq.DispatchingConfig) fq.QueueSet {
   211  	qs := qsc.theSet
   212  	if qs == nil {
   213  		qs = &queueSet{
   214  			clock:                    qsc.factory.clock,
   215  			estimatedServiceDuration: 3 * time.Millisecond,
   216  			reqsGaugePair:            qsc.reqsGaugePair,
   217  			execSeatsGauge:           qsc.execSeatsGauge,
   218  			seatDemandIntegrator:     qsc.seatDemandIntegrator,
   219  			qCfg:                     qsc.qCfg,
   220  			currentR:                 0,
   221  			lastRealTime:             qsc.factory.clock.Now(),
   222  			requestsExecutingSet:     sets.New[*request](),
   223  		}
   224  		qs.promiseFactory = qsc.factory.promiseFactoryFactory(qs)
   225  	}
   226  	qs.setConfiguration(context.Background(), qsc.qCfg, qsc.dealer, dCfg)
   227  	return qs
   228  }
   229  
   230  // createQueues is a helper method for initializing an array of n queues
   231  func createQueues(n, baseIndex int) []*queue {
   232  	fqqueues := make([]*queue, n)
   233  	for i := 0; i < n; i++ {
   234  		fqqueues[i] = &queue{index: baseIndex + i, requestsWaiting: newRequestFIFO(), requestsExecuting: sets.New[*request]()}
   235  	}
   236  	return fqqueues
   237  }
   238  
   239  func (qs *queueSet) BeginConfigChange(qCfg fq.QueuingConfig) (fq.QueueSetCompleter, error) {
   240  	dealer, err := checkConfig(qCfg)
   241  	if err != nil {
   242  		return nil, err
   243  	}
   244  	return &queueSetCompleter{
   245  		theSet: qs,
   246  		qCfg:   qCfg,
   247  		dealer: dealer}, nil
   248  }
   249  
   250  // setConfiguration is used to set the configuration for a queueSet.
   251  // Update handling for when fields are updated is handled here as well -
   252  // eg: if DesiredNum is increased, setConfiguration reconciles by
   253  // adding more queues.
   254  func (qs *queueSet) setConfiguration(ctx context.Context, qCfg fq.QueuingConfig, dealer *shufflesharding.Dealer, dCfg fq.DispatchingConfig) {
   255  	qs.lockAndSyncTime(ctx)
   256  	defer qs.lock.Unlock()
   257  
   258  	if qCfg.DesiredNumQueues > 0 {
   259  		// Adding queues is the only thing that requires immediate action
   260  		// Removing queues is handled by attrition, removing a queue when
   261  		// it goes empty and there are too many.
   262  		numQueues := len(qs.queues)
   263  		if qCfg.DesiredNumQueues > numQueues {
   264  			qs.queues = append(qs.queues,
   265  				createQueues(qCfg.DesiredNumQueues-numQueues, len(qs.queues))...)
   266  		}
   267  	} else {
   268  		qCfg.QueueLengthLimit = qs.qCfg.QueueLengthLimit
   269  		qCfg.HandSize = qs.qCfg.HandSize
   270  	}
   271  
   272  	qs.qCfg = qCfg
   273  	qs.dCfg = dCfg
   274  	qs.dealer = dealer
   275  	qll := qCfg.QueueLengthLimit
   276  	if qll < 1 {
   277  		qll = 1
   278  	}
   279  	if qCfg.DesiredNumQueues > 0 {
   280  		qll *= qCfg.DesiredNumQueues
   281  	}
   282  	qs.reqsGaugePair.RequestsWaiting.SetDenominator(float64(qll))
   283  	qs.reqsGaugePair.RequestsExecuting.SetDenominator(float64(dCfg.ConcurrencyDenominator))
   284  	qs.execSeatsGauge.SetDenominator(float64(dCfg.ConcurrencyDenominator))
   285  
   286  	qs.dispatchAsMuchAsPossibleLocked()
   287  }
   288  
   289  // A decision about a request
   290  type requestDecision int
   291  
   292  // Values passed through a request's decision
   293  const (
   294  	// Serve this one
   295  	decisionExecute requestDecision = iota
   296  
   297  	// This one's context timed out / was canceled
   298  	decisionCancel
   299  )
   300  
   301  // StartRequest begins the process of handling a request.  We take the
   302  // approach of updating the metrics about total requests queued and
   303  // executing at each point where there is a change in that quantity,
   304  // because the metrics --- and only the metrics --- track that
   305  // quantity per FlowSchema.
   306  // The queueSet's promiseFactory is invoked once if the returned Request is non-nil,
   307  // not invoked if the Request is nil.
   308  func (qs *queueSet) StartRequest(ctx context.Context, workEstimate *fqrequest.WorkEstimate, hashValue uint64, flowDistinguisher, fsName string, descr1, descr2 interface{}, queueNoteFn fq.QueueNoteFn) (fq.Request, bool) {
   309  	qs.lockAndSyncTime(ctx)
   310  	defer qs.lock.Unlock()
   311  	var req *request
   312  
   313  	// ========================================================================
   314  	// Step 0:
   315  	// Apply only concurrency limit, if zero queues desired
   316  	if qs.qCfg.DesiredNumQueues < 1 {
   317  		if !qs.canAccommodateSeatsLocked(workEstimate.MaxSeats()) {
   318  			klog.V(5).Infof("QS(%s): rejecting request %q %#+v %#+v because %d seats are asked for, %d seats are in use (%d are executing) and the limit is %d",
   319  				qs.qCfg.Name, fsName, descr1, descr2, workEstimate, qs.totSeatsInUse, qs.totRequestsExecuting, qs.dCfg.ConcurrencyLimit)
   320  			qs.totRequestsRejected++
   321  			metrics.AddReject(ctx, qs.qCfg.Name, fsName, "concurrency-limit")
   322  			return nil, qs.isIdleLocked()
   323  		}
   324  		req = qs.dispatchSansQueueLocked(ctx, workEstimate, flowDistinguisher, fsName, descr1, descr2)
   325  		return req, false
   326  	}
   327  
   328  	// ========================================================================
   329  	// Step 1:
   330  	// 1) Start with shuffle sharding, to pick a queue.
   331  	// 2) Reject current request if there is not enough concurrency shares and
   332  	// we are at max queue length
   333  	// 3) If not rejected, create a request and enqueue
   334  	req = qs.shuffleShardAndRejectOrEnqueueLocked(ctx, workEstimate, hashValue, flowDistinguisher, fsName, descr1, descr2, queueNoteFn)
   335  	// req == nil means that the request was rejected - no remaining
   336  	// concurrency shares and at max queue length already
   337  	if req == nil {
   338  		klog.V(5).Infof("QS(%s): rejecting request %q %#+v %#+v due to queue full", qs.qCfg.Name, fsName, descr1, descr2)
   339  		qs.totRequestsRejected++
   340  		metrics.AddReject(ctx, qs.qCfg.Name, fsName, "queue-full")
   341  		return nil, qs.isIdleLocked()
   342  	}
   343  
   344  	// ========================================================================
   345  	// Step 2:
   346  	// The next step is to invoke the method that dequeues as much
   347  	// as possible.
   348  	// This method runs a loop, as long as there are non-empty
   349  	// queues and the number currently executing is less than the
   350  	// assured concurrency value.  The body of the loop uses the
   351  	// fair queuing technique to pick a queue and dispatch a
   352  	// request from that queue.
   353  	qs.dispatchAsMuchAsPossibleLocked()
   354  
   355  	return req, false
   356  }
   357  
   358  // ordinaryPromiseFactoryFactory is the promiseFactoryFactory that
   359  // a queueSetFactory would ordinarily use.
   360  // Test code might use something different.
   361  func ordinaryPromiseFactoryFactory(qs *queueSet) promiseFactory {
   362  	return promise.NewWriteOnce
   363  }
   364  
   365  // MaxSeats returns the maximum number of seats this request requires, it is
   366  // the maxumum of the two - WorkEstimate.InitialSeats, WorkEstimate.FinalSeats.
   367  func (req *request) MaxSeats() int {
   368  	return req.workEstimate.MaxSeats()
   369  }
   370  
   371  func (req *request) InitialSeats() int {
   372  	return int(req.workEstimate.InitialSeats)
   373  }
   374  
   375  func (req *request) NoteQueued(inQueue bool) {
   376  	if req.queueNoteFn != nil {
   377  		req.queueNoteFn(inQueue)
   378  	}
   379  }
   380  
   381  func (req *request) Finish(execFn func()) bool {
   382  	exec, idle := req.wait()
   383  	if !exec {
   384  		return idle
   385  	}
   386  	func() {
   387  		defer func() {
   388  			idle = req.qs.finishRequestAndDispatchAsMuchAsPossible(req)
   389  		}()
   390  
   391  		execFn()
   392  	}()
   393  
   394  	return idle
   395  }
   396  
   397  func (req *request) wait() (bool, bool) {
   398  	qs := req.qs
   399  
   400  	// ========================================================================
   401  	// Step 3:
   402  	// The final step is to wait on a decision from
   403  	// somewhere and then act on it.
   404  	decisionAny := req.decision.Get()
   405  	qs.lockAndSyncTime(req.ctx)
   406  	defer qs.lock.Unlock()
   407  	if req.waitStarted {
   408  		// This can not happen, because the client is forbidden to
   409  		// call Wait twice on the same request
   410  		klog.Errorf("Duplicate call to the Wait method!  Immediately returning execute=false.  QueueSet=%s, startTime=%s, descr1=%#+v, descr2=%#+v", req.qs.qCfg.Name, req.startTime, req.descr1, req.descr2)
   411  		return false, qs.isIdleLocked()
   412  	}
   413  	req.waitStarted = true
   414  	switch decisionAny {
   415  	case decisionCancel: // handle in code following this switch
   416  	case decisionExecute:
   417  		klog.V(5).Infof("QS(%s): Dispatching request %#+v %#+v from its queue", qs.qCfg.Name, req.descr1, req.descr2)
   418  		return true, false
   419  	default:
   420  		// This can not happen, all possible values are handled above
   421  		klog.Errorf("QS(%s): Impossible decision (type %T, value %#+v) for request %#+v %#+v!  Treating as cancel", qs.qCfg.Name, decisionAny, decisionAny, req.descr1, req.descr2)
   422  	}
   423  	// TODO(aaron-prindle) add metrics for this case
   424  	klog.V(5).Infof("QS(%s): Ejecting request %#+v %#+v from its queue", qs.qCfg.Name, req.descr1, req.descr2)
   425  	// remove the request from the queue as its queue wait time has exceeded
   426  	queue := req.queue
   427  	if req.removeFromQueueLocked() != nil {
   428  		defer qs.boundNextDispatchLocked(queue)
   429  		qs.totRequestsWaiting--
   430  		qs.totSeatsWaiting -= req.MaxSeats()
   431  		qs.totRequestsRejected++
   432  		qs.totRequestsCancelled++
   433  		metrics.AddReject(req.ctx, qs.qCfg.Name, req.fsName, "time-out")
   434  		metrics.AddRequestsInQueues(req.ctx, qs.qCfg.Name, req.fsName, -1)
   435  		metrics.AddSeatsInQueues(req.ctx, qs.qCfg.Name, req.fsName, -req.MaxSeats())
   436  		req.NoteQueued(false)
   437  		qs.reqsGaugePair.RequestsWaiting.Add(-1)
   438  		qs.seatDemandIntegrator.Set(float64(qs.totSeatsInUse + qs.totSeatsWaiting))
   439  	}
   440  	return false, qs.isIdleLocked()
   441  }
   442  
   443  func (qs *queueSet) IsIdle() bool {
   444  	qs.lock.Lock()
   445  	defer qs.lock.Unlock()
   446  	return qs.isIdleLocked()
   447  }
   448  
   449  func (qs *queueSet) isIdleLocked() bool {
   450  	return qs.totRequestsWaiting == 0 && qs.totRequestsExecuting == 0
   451  }
   452  
   453  // lockAndSyncTime acquires the lock and updates the virtual time.
   454  // Doing them together avoids the mistake of modifying some queue state
   455  // before calling syncTimeLocked.
   456  func (qs *queueSet) lockAndSyncTime(ctx context.Context) {
   457  	qs.lock.Lock()
   458  	qs.syncTimeLocked(ctx)
   459  }
   460  
   461  // syncTimeLocked updates the virtual time based on the assumption
   462  // that the current state of the queues has been in effect since
   463  // `qs.lastRealTime`.  Thus, it should be invoked after acquiring the
   464  // lock and before modifying the state of any queue.
   465  func (qs *queueSet) syncTimeLocked(ctx context.Context) {
   466  	realNow := qs.clock.Now()
   467  	timeSinceLast := realNow.Sub(qs.lastRealTime)
   468  	qs.lastRealTime = realNow
   469  	prevR := qs.currentR
   470  	incrR := fqrequest.SeatsTimesDuration(qs.getVirtualTimeRatioLocked(), timeSinceLast)
   471  	qs.currentR = prevR + incrR
   472  	switch {
   473  	case prevR > qs.currentR:
   474  		klog.ErrorS(errors.New("queueset::currentR overflow"), "Overflow", "QS", qs.qCfg.Name, "when", realNow.Format(nsTimeFmt), "prevR", prevR, "incrR", incrR, "currentR", qs.currentR)
   475  	case qs.currentR >= highR:
   476  		qs.advanceEpoch(ctx, realNow, incrR)
   477  	}
   478  	metrics.SetCurrentR(qs.qCfg.Name, qs.currentR.ToFloat())
   479  }
   480  
   481  // rDecrement is the amount by which the progress meter R is wound backwards
   482  // when needed to avoid overflow.
   483  const rDecrement = fqrequest.MaxSeatSeconds / 2
   484  
   485  // highR is the threshold that triggers advance of the epoch.
   486  // That is, decrementing the global progress meter R by rDecrement.
   487  const highR = rDecrement + rDecrement/2
   488  
   489  // advanceEpoch subtracts rDecrement from the global progress meter R
   490  // and all the readings that have been taked from that meter.
   491  // The now and incrR parameters are only used to add info to the log messages.
   492  func (qs *queueSet) advanceEpoch(ctx context.Context, now time.Time, incrR fqrequest.SeatSeconds) {
   493  	oldR := qs.currentR
   494  	qs.currentR -= rDecrement
   495  	klog.InfoS("Advancing epoch", "QS", qs.qCfg.Name, "when", now.Format(nsTimeFmt), "oldR", oldR, "newR", qs.currentR, "incrR", incrR)
   496  	success := true
   497  	for qIdx, queue := range qs.queues {
   498  		if queue.requestsWaiting.Length() == 0 && queue.requestsExecuting.Len() == 0 {
   499  			// Do not just decrement, the value could be quite outdated.
   500  			// It is safe to reset to zero in this case, because the next request
   501  			// will overwrite the zero with `qs.currentR`.
   502  			queue.nextDispatchR = 0
   503  			continue
   504  		}
   505  		oldNextDispatchR := queue.nextDispatchR
   506  		queue.nextDispatchR -= rDecrement
   507  		if queue.nextDispatchR > oldNextDispatchR {
   508  			klog.ErrorS(errors.New("queue::nextDispatchR underflow"), "Underflow", "QS", qs.qCfg.Name, "queue", qIdx, "oldNextDispatchR", oldNextDispatchR, "newNextDispatchR", queue.nextDispatchR, "incrR", incrR)
   509  			success = false
   510  		}
   511  		queue.requestsWaiting.Walk(func(req *request) bool {
   512  			oldArrivalR := req.arrivalR
   513  			req.arrivalR -= rDecrement
   514  			if req.arrivalR > oldArrivalR {
   515  				klog.ErrorS(errors.New("request::arrivalR underflow"), "Underflow", "QS", qs.qCfg.Name, "queue", qIdx, "request", *req, "oldArrivalR", oldArrivalR, "incrR", incrR)
   516  				success = false
   517  			}
   518  			return true
   519  		})
   520  	}
   521  	metrics.AddEpochAdvance(ctx, qs.qCfg.Name, success)
   522  }
   523  
   524  // getVirtualTimeRatio calculates the rate at which virtual time has
   525  // been advancing, according to the logic in `doc.go`.
   526  func (qs *queueSet) getVirtualTimeRatioLocked() float64 {
   527  	activeQueues := 0
   528  	seatsRequested := 0
   529  	for _, queue := range qs.queues {
   530  		// here we want the sum of the maximum width of the requests in this queue since our
   531  		// goal is to find the maximum rate at which the queue could work.
   532  		seatsRequested += (queue.seatsInUse + queue.requestsWaiting.QueueSum().MaxSeatsSum)
   533  		if queue.requestsWaiting.Length() > 0 || queue.requestsExecuting.Len() > 0 {
   534  			activeQueues++
   535  		}
   536  	}
   537  	if activeQueues == 0 {
   538  		return 0
   539  	}
   540  	return math.Min(float64(seatsRequested), float64(qs.dCfg.ConcurrencyLimit)) / float64(activeQueues)
   541  }
   542  
   543  // shuffleShardAndRejectOrEnqueueLocked encapsulates the logic required
   544  // to validate and enqueue a request for the queueSet/QueueSet:
   545  // 1) Start with shuffle sharding, to pick a queue.
   546  // 2) Reject current request if there is not enough concurrency shares and
   547  // we are at max queue length
   548  // 3) If not rejected, create a request and enqueue
   549  // returns the enqueud request on a successful enqueue
   550  // returns nil in the case that there is no available concurrency or
   551  // the queuelengthlimit has been reached
   552  func (qs *queueSet) shuffleShardAndRejectOrEnqueueLocked(ctx context.Context, workEstimate *fqrequest.WorkEstimate, hashValue uint64, flowDistinguisher, fsName string, descr1, descr2 interface{}, queueNoteFn fq.QueueNoteFn) *request {
   553  	// Start with the shuffle sharding, to pick a queue.
   554  	queueIdx := qs.shuffleShardLocked(hashValue, descr1, descr2)
   555  	queue := qs.queues[queueIdx]
   556  
   557  	defer qs.boundNextDispatchLocked(queue)
   558  
   559  	// Create a request and enqueue
   560  	req := &request{
   561  		qs:                qs,
   562  		fsName:            fsName,
   563  		flowDistinguisher: flowDistinguisher,
   564  		ctx:               ctx,
   565  		decision:          qs.promiseFactory(nil, ctx, decisionCancel),
   566  		arrivalTime:       qs.clock.Now(),
   567  		arrivalR:          qs.currentR,
   568  		queue:             queue,
   569  		descr1:            descr1,
   570  		descr2:            descr2,
   571  		queueNoteFn:       queueNoteFn,
   572  		workEstimate:      qs.completeWorkEstimate(workEstimate),
   573  	}
   574  	if ok := qs.rejectOrEnqueueToBoundLocked(req); !ok {
   575  		return nil
   576  	}
   577  	metrics.ObserveQueueLength(ctx, qs.qCfg.Name, fsName, queue.requestsWaiting.Length())
   578  	return req
   579  }
   580  
   581  // shuffleShardLocked uses shuffle sharding to select a queue index
   582  // using the given hashValue and the shuffle sharding parameters of the queueSet.
   583  func (qs *queueSet) shuffleShardLocked(hashValue uint64, descr1, descr2 interface{}) int {
   584  	var backHand [8]int
   585  	// Deal into a data structure, so that the order of visit below is not necessarily the order of the deal.
   586  	// This removes bias in the case of flows with overlapping hands.
   587  	hand := qs.dealer.DealIntoHand(hashValue, backHand[:])
   588  	handSize := len(hand)
   589  	offset := qs.enqueues % handSize
   590  	qs.enqueues++
   591  	bestQueueIdx := -1
   592  	minQueueSeatSeconds := fqrequest.MaxSeatSeconds
   593  	for i := 0; i < handSize; i++ {
   594  		queueIdx := hand[(offset+i)%handSize]
   595  		queue := qs.queues[queueIdx]
   596  		queueSum := queue.requestsWaiting.QueueSum()
   597  
   598  		// this is the total amount of work in seat-seconds for requests
   599  		// waiting in this queue, we will select the queue with the minimum.
   600  		thisQueueSeatSeconds := queueSum.TotalWorkSum
   601  		klog.V(7).Infof("QS(%s): For request %#+v %#+v considering queue %d with sum: %#v and %d seats in use, nextDispatchR=%v", qs.qCfg.Name, descr1, descr2, queueIdx, queueSum, queue.seatsInUse, queue.nextDispatchR)
   602  		if thisQueueSeatSeconds < minQueueSeatSeconds {
   603  			minQueueSeatSeconds = thisQueueSeatSeconds
   604  			bestQueueIdx = queueIdx
   605  		}
   606  	}
   607  	if klogV := klog.V(6); klogV.Enabled() {
   608  		chosenQueue := qs.queues[bestQueueIdx]
   609  		klogV.Infof("QS(%s) at t=%s R=%v: For request %#+v %#+v chose queue %d, with sum: %#v & %d seats in use & nextDispatchR=%v", qs.qCfg.Name, qs.clock.Now().Format(nsTimeFmt), qs.currentR, descr1, descr2, bestQueueIdx, chosenQueue.requestsWaiting.QueueSum(), chosenQueue.seatsInUse, chosenQueue.nextDispatchR)
   610  	}
   611  	return bestQueueIdx
   612  }
   613  
   614  // rejectOrEnqueueToBoundLocked rejects or enqueues the newly arrived
   615  // request, which has been assigned to a queue.  If up against the
   616  // queue length limit and the concurrency limit then returns false.
   617  // Otherwise enqueues and returns true.
   618  func (qs *queueSet) rejectOrEnqueueToBoundLocked(request *request) bool {
   619  	queue := request.queue
   620  	curQueueLength := queue.requestsWaiting.Length()
   621  	// rejects the newly arrived request if resource criteria not met
   622  	if qs.totSeatsInUse >= qs.dCfg.ConcurrencyLimit &&
   623  		curQueueLength >= qs.qCfg.QueueLengthLimit {
   624  		return false
   625  	}
   626  
   627  	qs.enqueueToBoundLocked(request)
   628  	return true
   629  }
   630  
   631  // enqueues a request into its queue.
   632  func (qs *queueSet) enqueueToBoundLocked(request *request) {
   633  	queue := request.queue
   634  	now := qs.clock.Now()
   635  	if queue.requestsWaiting.Length() == 0 && queue.requestsExecuting.Len() == 0 {
   636  		// the queue’s start R is set to the virtual time.
   637  		queue.nextDispatchR = qs.currentR
   638  		klogV := klog.V(6)
   639  		if klogV.Enabled() {
   640  			klogV.Infof("QS(%s) at t=%s R=%v: initialized queue %d start R due to request %#+v %#+v", qs.qCfg.Name, now.Format(nsTimeFmt), queue.nextDispatchR, queue.index, request.descr1, request.descr2)
   641  		}
   642  	}
   643  	request.removeFromQueueLocked = queue.requestsWaiting.Enqueue(request)
   644  	qs.totRequestsWaiting++
   645  	qs.totSeatsWaiting += request.MaxSeats()
   646  	metrics.AddRequestsInQueues(request.ctx, qs.qCfg.Name, request.fsName, 1)
   647  	metrics.AddSeatsInQueues(request.ctx, qs.qCfg.Name, request.fsName, request.MaxSeats())
   648  	request.NoteQueued(true)
   649  	qs.reqsGaugePair.RequestsWaiting.Add(1)
   650  	qs.seatDemandIntegrator.Set(float64(qs.totSeatsInUse + qs.totSeatsWaiting))
   651  }
   652  
   653  // dispatchAsMuchAsPossibleLocked does as many dispatches as possible now.
   654  func (qs *queueSet) dispatchAsMuchAsPossibleLocked() {
   655  	for qs.totRequestsWaiting != 0 && qs.totSeatsInUse < qs.dCfg.ConcurrencyLimit && qs.dispatchLocked() {
   656  	}
   657  }
   658  
   659  func (qs *queueSet) dispatchSansQueueLocked(ctx context.Context, workEstimate *fqrequest.WorkEstimate, flowDistinguisher, fsName string, descr1, descr2 interface{}) *request {
   660  	// does not call metrics.SetDispatchMetrics because there is no queuing and thus no interesting virtual world
   661  	now := qs.clock.Now()
   662  	req := &request{
   663  		qs:                qs,
   664  		fsName:            fsName,
   665  		flowDistinguisher: flowDistinguisher,
   666  		ctx:               ctx,
   667  		startTime:         now,
   668  		decision:          qs.promiseFactory(decisionExecute, ctx, decisionCancel),
   669  		arrivalTime:       now,
   670  		arrivalR:          qs.currentR,
   671  		descr1:            descr1,
   672  		descr2:            descr2,
   673  		workEstimate:      qs.completeWorkEstimate(workEstimate),
   674  	}
   675  	qs.totRequestsExecuting++
   676  	qs.totSeatsInUse += req.MaxSeats()
   677  	qs.requestsExecutingSet = qs.requestsExecutingSet.Insert(req)
   678  	metrics.AddRequestsExecuting(ctx, qs.qCfg.Name, fsName, 1)
   679  	metrics.AddSeatConcurrencyInUse(qs.qCfg.Name, fsName, req.MaxSeats())
   680  	qs.reqsGaugePair.RequestsExecuting.Add(1)
   681  	qs.execSeatsGauge.Add(float64(req.MaxSeats()))
   682  	qs.seatDemandIntegrator.Set(float64(qs.totSeatsInUse + qs.totSeatsWaiting))
   683  	klogV := klog.V(5)
   684  	if klogV.Enabled() {
   685  		klogV.Infof("QS(%s) at t=%s R=%v: immediate dispatch of request %q %#+v %#+v, qs will have %d executing", qs.qCfg.Name, now.Format(nsTimeFmt), qs.currentR, fsName, descr1, descr2, qs.totRequestsExecuting)
   686  	}
   687  	return req
   688  }
   689  
   690  // dispatchLocked uses the Fair Queuing for Server Requests method to
   691  // select a queue and dispatch the oldest request in that queue.  The
   692  // return value indicates whether a request was dequeued; this will
   693  // be false when either all queues are empty or the request at the head
   694  // of the next queue cannot be dispatched.
   695  func (qs *queueSet) dispatchLocked() bool {
   696  	queue, request := qs.findDispatchQueueToBoundLocked()
   697  	if queue == nil {
   698  		return false
   699  	}
   700  	if request == nil { // This should never happen.  But if it does...
   701  		return false
   702  	}
   703  	qs.totRequestsWaiting--
   704  	qs.totSeatsWaiting -= request.MaxSeats()
   705  	metrics.AddRequestsInQueues(request.ctx, qs.qCfg.Name, request.fsName, -1)
   706  	metrics.AddSeatsInQueues(request.ctx, qs.qCfg.Name, request.fsName, -request.MaxSeats())
   707  	request.NoteQueued(false)
   708  	qs.reqsGaugePair.RequestsWaiting.Add(-1)
   709  	defer qs.boundNextDispatchLocked(queue)
   710  	if !request.decision.Set(decisionExecute) {
   711  		qs.seatDemandIntegrator.Set(float64(qs.totSeatsInUse + qs.totSeatsWaiting))
   712  		return true
   713  	}
   714  	request.startTime = qs.clock.Now()
   715  	// At this moment the request leaves its queue and starts
   716  	// executing.  We do not recognize any interim state between
   717  	// "queued" and "executing".  While that means "executing"
   718  	// includes a little overhead from this package, this is not a
   719  	// problem because other overhead is also included.
   720  	qs.totRequestsExecuting++
   721  	qs.totSeatsInUse += request.MaxSeats()
   722  	queue.requestsExecuting = queue.requestsExecuting.Insert(request)
   723  	queue.seatsInUse += request.MaxSeats()
   724  	metrics.AddRequestsExecuting(request.ctx, qs.qCfg.Name, request.fsName, 1)
   725  	metrics.AddSeatConcurrencyInUse(qs.qCfg.Name, request.fsName, request.MaxSeats())
   726  	qs.reqsGaugePair.RequestsExecuting.Add(1)
   727  	qs.execSeatsGauge.Add(float64(request.MaxSeats()))
   728  	qs.seatDemandIntegrator.Set(float64(qs.totSeatsInUse + qs.totSeatsWaiting))
   729  	klogV := klog.V(6)
   730  	if klogV.Enabled() {
   731  		klogV.Infof("QS(%s) at t=%s R=%v: dispatching request %#+v %#+v work %v from queue %d with start R %v, queue will have %d waiting & %d requests occupying %d seats, set will have %d seats occupied",
   732  			qs.qCfg.Name, request.startTime.Format(nsTimeFmt), qs.currentR, request.descr1, request.descr2,
   733  			request.workEstimate, queue.index, queue.nextDispatchR, queue.requestsWaiting.Length(), queue.requestsExecuting.Len(), queue.seatsInUse, qs.totSeatsInUse)
   734  	}
   735  	// When a request is dequeued for service -> qs.virtualStart += G * width
   736  	if request.totalWork() > rDecrement/100 { // A single increment should never be so big
   737  		klog.Errorf("QS(%s) at t=%s R=%v: dispatching request %#+v %#+v with implausibly high work %v from queue %d with start R %v",
   738  			qs.qCfg.Name, request.startTime.Format(nsTimeFmt), qs.currentR, request.descr1, request.descr2,
   739  			request.workEstimate, queue.index, queue.nextDispatchR)
   740  	}
   741  	queue.nextDispatchR += request.totalWork()
   742  	return true
   743  }
   744  
   745  // canAccommodateSeatsLocked returns true if this queueSet has enough
   746  // seats available to accommodate a request with the given number of seats,
   747  // otherwise it returns false.
   748  func (qs *queueSet) canAccommodateSeatsLocked(seats int) bool {
   749  	switch {
   750  	case qs.qCfg.DesiredNumQueues < 0:
   751  		// This is code for exemption from limitation
   752  		return true
   753  	case seats > qs.dCfg.ConcurrencyLimit:
   754  		// we have picked the queue with the minimum virtual finish time, but
   755  		// the number of seats this request asks for exceeds the concurrency limit.
   756  		// TODO: this is a quick fix for now, once we have borrowing in place we will not need it
   757  		if qs.totRequestsExecuting == 0 {
   758  			// TODO: apply additional lateny associated with this request, as described in the KEP
   759  			return true
   760  		}
   761  		// wait for all "currently" executing requests in this queueSet
   762  		// to finish before we can execute this request.
   763  		return false
   764  	case qs.totSeatsInUse+seats > qs.dCfg.ConcurrencyLimit:
   765  		return false
   766  	}
   767  
   768  	return true
   769  }
   770  
   771  // findDispatchQueueToBoundLocked examines the queues in round robin order and
   772  // returns the first one of those for which the virtual finish time of
   773  // the oldest waiting request is minimal, and also returns that request.
   774  // Returns nils if the head of the selected queue can not be dispatched now,
   775  // in which case the caller does not need to follow up with`qs.boundNextDispatchLocked`.
   776  func (qs *queueSet) findDispatchQueueToBoundLocked() (*queue, *request) {
   777  	minVirtualFinish := fqrequest.MaxSeatSeconds
   778  	sMin := fqrequest.MaxSeatSeconds
   779  	dsMin := fqrequest.MaxSeatSeconds
   780  	sMax := fqrequest.MinSeatSeconds
   781  	dsMax := fqrequest.MinSeatSeconds
   782  	var minQueue *queue
   783  	var minIndex int
   784  	nq := len(qs.queues)
   785  	for range qs.queues {
   786  		qs.robinIndex = (qs.robinIndex + 1) % nq
   787  		queue := qs.queues[qs.robinIndex]
   788  		oldestWaiting, _ := queue.requestsWaiting.Peek()
   789  		if oldestWaiting != nil {
   790  			sMin = min(sMin, queue.nextDispatchR)
   791  			sMax = max(sMax, queue.nextDispatchR)
   792  			estimatedWorkInProgress := fqrequest.SeatsTimesDuration(float64(queue.seatsInUse), qs.estimatedServiceDuration)
   793  			dsMin = min(dsMin, queue.nextDispatchR-estimatedWorkInProgress)
   794  			dsMax = max(dsMax, queue.nextDispatchR-estimatedWorkInProgress)
   795  			currentVirtualFinish := queue.nextDispatchR + oldestWaiting.totalWork()
   796  			klog.V(11).InfoS("Considering queue to dispatch", "queueSet", qs.qCfg.Name, "queue", qs.robinIndex, "finishR", currentVirtualFinish)
   797  			if currentVirtualFinish < minVirtualFinish {
   798  				minVirtualFinish = currentVirtualFinish
   799  				minQueue = queue
   800  				minIndex = qs.robinIndex
   801  			}
   802  		}
   803  	}
   804  
   805  	oldestReqFromMinQueue, _ := minQueue.requestsWaiting.Peek()
   806  	if oldestReqFromMinQueue == nil {
   807  		// This cannot happen
   808  		klog.ErrorS(errors.New("selected queue is empty"), "Impossible", "queueSet", qs.qCfg.Name)
   809  		return nil, nil
   810  	}
   811  	if !qs.canAccommodateSeatsLocked(oldestReqFromMinQueue.MaxSeats()) {
   812  		// since we have not picked the queue with the minimum virtual finish
   813  		// time, we are not going to advance the round robin index here.
   814  		klogV := klog.V(4)
   815  		if klogV.Enabled() {
   816  			klogV.Infof("QS(%s): request %v %v seats %d cannot be dispatched from queue %d, waiting for currently executing requests to complete, %d requests are occupying %d seats and the limit is %d",
   817  				qs.qCfg.Name, oldestReqFromMinQueue.descr1, oldestReqFromMinQueue.descr2, oldestReqFromMinQueue.MaxSeats(), minQueue.index, qs.totRequestsExecuting, qs.totSeatsInUse, qs.dCfg.ConcurrencyLimit)
   818  		}
   819  		metrics.AddDispatchWithNoAccommodation(qs.qCfg.Name, oldestReqFromMinQueue.fsName)
   820  		return nil, nil
   821  	}
   822  	oldestReqFromMinQueue.removeFromQueueLocked()
   823  
   824  	// If the requested final seats exceed capacity of that queue,
   825  	// we reduce them to current capacity and adjust additional latency
   826  	// to preserve the total amount of work.
   827  	if oldestReqFromMinQueue.workEstimate.FinalSeats > uint64(qs.dCfg.ConcurrencyLimit) {
   828  		finalSeats := uint64(qs.dCfg.ConcurrencyLimit)
   829  		additionalLatency := oldestReqFromMinQueue.workEstimate.finalWork.DurationPerSeat(float64(finalSeats))
   830  		oldestReqFromMinQueue.workEstimate.FinalSeats = finalSeats
   831  		oldestReqFromMinQueue.workEstimate.AdditionalLatency = additionalLatency
   832  	}
   833  
   834  	// we set the round robin indexing to start at the chose queue
   835  	// for the next round.  This way the non-selected queues
   836  	// win in the case that the virtual finish times are the same
   837  	qs.robinIndex = minIndex
   838  
   839  	if minQueue.nextDispatchR < oldestReqFromMinQueue.arrivalR {
   840  		klog.ErrorS(errors.New("dispatch before arrival"), "Inconceivable!", "QS", qs.qCfg.Name, "queue", minQueue.index, "dispatchR", minQueue.nextDispatchR, "request", oldestReqFromMinQueue)
   841  	}
   842  	metrics.SetDispatchMetrics(qs.qCfg.Name, qs.currentR.ToFloat(), minQueue.nextDispatchR.ToFloat(), sMin.ToFloat(), sMax.ToFloat(), dsMin.ToFloat(), dsMax.ToFloat())
   843  	return minQueue, oldestReqFromMinQueue
   844  }
   845  
   846  // finishRequestAndDispatchAsMuchAsPossible is a convenience method
   847  // which calls finishRequest for a given request and then dispatches
   848  // as many requests as possible.  This is all of what needs to be done
   849  // once a request finishes execution or is canceled.  This returns a bool
   850  // indicating whether the QueueSet is now idle.
   851  func (qs *queueSet) finishRequestAndDispatchAsMuchAsPossible(req *request) bool {
   852  	qs.lockAndSyncTime(req.ctx)
   853  	defer qs.lock.Unlock()
   854  
   855  	qs.finishRequestLocked(req)
   856  	qs.dispatchAsMuchAsPossibleLocked()
   857  	return qs.isIdleLocked()
   858  }
   859  
   860  // finishRequestLocked is a callback that should be used when a
   861  // previously dispatched request has completed it's service.  This
   862  // callback updates important state in the queueSet
   863  func (qs *queueSet) finishRequestLocked(r *request) {
   864  	now := qs.clock.Now()
   865  	qs.totRequestsExecuting--
   866  	metrics.AddRequestsExecuting(r.ctx, qs.qCfg.Name, r.fsName, -1)
   867  	qs.reqsGaugePair.RequestsExecuting.Add(-1)
   868  
   869  	actualServiceDuration := now.Sub(r.startTime)
   870  
   871  	// TODO: for now we keep the logic localized so it is easier to see
   872  	//  how the counters are tracked for queueset and queue, in future we
   873  	//  can refactor to move this function.
   874  	releaseSeatsLocked := func() {
   875  		defer qs.removeQueueIfEmptyLocked(r)
   876  
   877  		qs.totSeatsInUse -= r.MaxSeats()
   878  		metrics.AddSeatConcurrencyInUse(qs.qCfg.Name, r.fsName, -r.MaxSeats())
   879  		qs.execSeatsGauge.Add(-float64(r.MaxSeats()))
   880  		qs.seatDemandIntegrator.Set(float64(qs.totSeatsInUse + qs.totSeatsWaiting))
   881  		if r.queue != nil {
   882  			r.queue.seatsInUse -= r.MaxSeats()
   883  		}
   884  	}
   885  
   886  	defer func() {
   887  		klogV := klog.V(6)
   888  		if r.workEstimate.AdditionalLatency <= 0 {
   889  			// release the seats allocated to this request immediately
   890  			releaseSeatsLocked()
   891  			if !klogV.Enabled() {
   892  			} else if r.queue != nil {
   893  				klogV.Infof("QS(%s) at t=%s R=%v: request %#+v %#+v finished all use of %d seats, adjusted queue %d start R to %v due to service time %.9fs, queue will have %d requests with %#v waiting & %d requests occupying %d seats",
   894  					qs.qCfg.Name, now.Format(nsTimeFmt), qs.currentR, r.descr1, r.descr2, r.workEstimate.MaxSeats(), r.queue.index,
   895  					r.queue.nextDispatchR, actualServiceDuration.Seconds(), r.queue.requestsWaiting.Length(), r.queue.requestsWaiting.QueueSum(), r.queue.requestsExecuting.Len(), r.queue.seatsInUse)
   896  			} else {
   897  				klogV.Infof("QS(%s) at t=%s R=%v: request %#+v %#+v finished all use of %d seats, qs will have %d requests occupying %d seats", qs.qCfg.Name, now.Format(nsTimeFmt), qs.currentR, r.descr1, r.descr2, r.workEstimate.InitialSeats, qs.totRequestsExecuting, qs.totSeatsInUse)
   898  			}
   899  			return
   900  		}
   901  
   902  		additionalLatency := r.workEstimate.AdditionalLatency
   903  		if !klogV.Enabled() {
   904  		} else if r.queue != nil {
   905  			klogV.Infof("QS(%s) at t=%s R=%v: request %#+v %#+v finished main use of %d seats but lingering on %d seats for %v seconds, adjusted queue %d start R to %v due to service time %.9fs, queue will have %d requests with %#v waiting & %d requests occupying %d seats",
   906  				qs.qCfg.Name, now.Format(nsTimeFmt), qs.currentR, r.descr1, r.descr2, r.workEstimate.InitialSeats, r.workEstimate.FinalSeats, additionalLatency.Seconds(), r.queue.index,
   907  				r.queue.nextDispatchR, actualServiceDuration.Seconds(), r.queue.requestsWaiting.Length(), r.queue.requestsWaiting.QueueSum(), r.queue.requestsExecuting.Len(), r.queue.seatsInUse)
   908  		} else {
   909  			klogV.Infof("QS(%s) at t=%s R=%v: request %#+v %#+v finished main use of %d seats but lingering on %d seats for %v seconds, qs will have %d requests occupying %d seats", qs.qCfg.Name, now.Format(nsTimeFmt), qs.currentR, r.descr1, r.descr2, r.workEstimate.InitialSeats, r.workEstimate.FinalSeats, additionalLatency.Seconds(), qs.totRequestsExecuting, qs.totSeatsInUse)
   910  		}
   911  		// EventAfterDuration will execute the event func in a new goroutine,
   912  		// so the seats allocated to this request will be released after
   913  		// AdditionalLatency elapses, this ensures that the additional
   914  		// latency has no impact on the user experience.
   915  		qs.clock.EventAfterDuration(func(_ time.Time) {
   916  			qs.lockAndSyncTime(r.ctx)
   917  			defer qs.lock.Unlock()
   918  			now := qs.clock.Now()
   919  			releaseSeatsLocked()
   920  			if !klogV.Enabled() {
   921  			} else if r.queue != nil {
   922  				klogV.Infof("QS(%s) at t=%s R=%v: request %#+v %#+v finished lingering on %d seats, queue %d will have %d requests with %#v waiting & %d requests occupying %d seats",
   923  					qs.qCfg.Name, now.Format(nsTimeFmt), qs.currentR, r.descr1, r.descr2, r.workEstimate.FinalSeats, r.queue.index,
   924  					r.queue.requestsWaiting.Length(), r.queue.requestsWaiting.QueueSum(), r.queue.requestsExecuting.Len(), r.queue.seatsInUse)
   925  			} else {
   926  				klogV.Infof("QS(%s) at t=%s R=%v: request %#+v %#+v finished lingering on %d seats, qs will have %d requests occupying %d seats", qs.qCfg.Name, now.Format(nsTimeFmt), qs.currentR, r.descr1, r.descr2, r.workEstimate.FinalSeats, qs.totRequestsExecuting, qs.totSeatsInUse)
   927  			}
   928  			qs.dispatchAsMuchAsPossibleLocked()
   929  		}, additionalLatency)
   930  	}()
   931  
   932  	if r.queue != nil {
   933  		// request has finished, remove from requests executing
   934  		r.queue.requestsExecuting = r.queue.requestsExecuting.Delete(r)
   935  
   936  		// When a request finishes being served, and the actual service time was S,
   937  		// the queue’s start R is decremented by (G - S)*width.
   938  		r.queue.nextDispatchR -= fqrequest.SeatsTimesDuration(float64(r.InitialSeats()), qs.estimatedServiceDuration-actualServiceDuration)
   939  		qs.boundNextDispatchLocked(r.queue)
   940  	} else {
   941  		qs.requestsExecutingSet = qs.requestsExecutingSet.Delete(r)
   942  	}
   943  }
   944  
   945  // boundNextDispatchLocked applies the anti-windup hack.
   946  // We need a hack because all non-empty queues are allocated the same
   947  // number of seats.  A queue that can not use all those seats and does
   948  // not go empty accumulates a progresively earlier `virtualStart` compared
   949  // to queues that are using more than they are allocated.
   950  // The following hack addresses the first side of that inequity,
   951  // by insisting that dispatch in the virtual world not precede arrival.
   952  func (qs *queueSet) boundNextDispatchLocked(queue *queue) {
   953  	oldestReqFromMinQueue, _ := queue.requestsWaiting.Peek()
   954  	if oldestReqFromMinQueue == nil {
   955  		return
   956  	}
   957  	var virtualStartBound = oldestReqFromMinQueue.arrivalR
   958  	if queue.nextDispatchR < virtualStartBound {
   959  		if klogV := klog.V(4); klogV.Enabled() {
   960  			klogV.InfoS("AntiWindup tweaked queue", "QS", qs.qCfg.Name, "queue", queue.index, "time", qs.clock.Now().Format(nsTimeFmt), "requestDescr1", oldestReqFromMinQueue.descr1, "requestDescr2", oldestReqFromMinQueue.descr2, "newVirtualStart", virtualStartBound, "deltaVirtualStart", (virtualStartBound - queue.nextDispatchR))
   961  		}
   962  		queue.nextDispatchR = virtualStartBound
   963  	}
   964  }
   965  
   966  func (qs *queueSet) removeQueueIfEmptyLocked(r *request) {
   967  	if r.queue == nil {
   968  		return
   969  	}
   970  
   971  	// If there are more queues than desired and this one has no
   972  	// requests then remove it
   973  	if len(qs.queues) > qs.qCfg.DesiredNumQueues &&
   974  		r.queue.requestsWaiting.Length() == 0 &&
   975  		r.queue.requestsExecuting.Len() == 0 {
   976  		qs.queues = removeQueueAndUpdateIndexes(qs.queues, r.queue.index)
   977  
   978  		// decrement here to maintain the invariant that (qs.robinIndex+1) % numQueues
   979  		// is the index of the next queue after the one last dispatched from
   980  		if qs.robinIndex >= r.queue.index {
   981  			qs.robinIndex--
   982  		}
   983  	}
   984  }
   985  
   986  // removeQueueAndUpdateIndexes uses reslicing to remove an index from a slice
   987  // and then updates the 'index' field of the queues to be correct
   988  func removeQueueAndUpdateIndexes(queues []*queue, index int) []*queue {
   989  	keptQueues := append(queues[:index], queues[index+1:]...)
   990  	for i := index; i < len(keptQueues); i++ {
   991  		keptQueues[i].index--
   992  	}
   993  	return keptQueues
   994  }
   995  
   996  func (qs *queueSet) Dump(includeRequestDetails bool) debug.QueueSetDump {
   997  	qs.lock.Lock()
   998  	defer qs.lock.Unlock()
   999  	d := debug.QueueSetDump{
  1000  		Queues:                     make([]debug.QueueDump, len(qs.queues)),
  1001  		QueuelessExecutingRequests: SetMapReduce(dumpRequest(includeRequestDetails), append1[debug.RequestDump])(qs.requestsExecutingSet),
  1002  		Waiting:                    qs.totRequestsWaiting,
  1003  		Executing:                  qs.totRequestsExecuting,
  1004  		SeatsInUse:                 qs.totSeatsInUse,
  1005  		SeatsWaiting:               qs.totSeatsWaiting,
  1006  		Dispatched:                 qs.totRequestsDispatched,
  1007  		Rejected:                   qs.totRequestsRejected,
  1008  		Timedout:                   qs.totRequestsTimedout,
  1009  		Cancelled:                  qs.totRequestsCancelled,
  1010  	}
  1011  	for i, q := range qs.queues {
  1012  		d.Queues[i] = q.dumpLocked(includeRequestDetails)
  1013  	}
  1014  	return d
  1015  }
  1016  
  1017  func OnRequestDispatched(r fq.Request) {
  1018  	req, ok := r.(*request)
  1019  	if !ok {
  1020  		return
  1021  	}
  1022  
  1023  	qs := req.qs
  1024  	if qs != nil {
  1025  		qs.lock.Lock()
  1026  		defer qs.lock.Unlock()
  1027  		qs.totRequestsDispatched++
  1028  	}
  1029  }