k8s.io/apiserver@v0.31.1/pkg/util/flowcontrol/request/object_count_tracker.go (about)

     1  /*
     2  Copyright 2021 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package request
    18  
    19  import (
    20  	"errors"
    21  	"sync"
    22  	"time"
    23  
    24  	"k8s.io/apimachinery/pkg/util/wait"
    25  	"k8s.io/klog/v2"
    26  	"k8s.io/utils/clock"
    27  )
    28  
    29  const (
    30  	// type deletion (it applies mostly to CRD) is not a very frequent
    31  	// operation so we can afford to prune the cache at a large interval.
    32  	// at the same time, we also want to make sure that the scalability
    33  	// tests hit this code path.
    34  	pruneInterval = 1 * time.Hour
    35  
    36  	// the storage layer polls for object count at every 1m interval, we will allow
    37  	// up to 2-3 transient failures to get the latest count for a given resource.
    38  	staleTolerationThreshold = 3 * time.Minute
    39  )
    40  
    41  var (
    42  	// ObjectCountNotFoundErr is returned when the object count for
    43  	// a given resource is not being tracked.
    44  	ObjectCountNotFoundErr = errors.New("object count not found for the given resource")
    45  
    46  	// ObjectCountStaleErr is returned when the object count for a
    47  	// given resource has gone stale due to transient failures.
    48  	ObjectCountStaleErr = errors.New("object count has gone stale for the given resource")
    49  )
    50  
    51  // StorageObjectCountTracker is an interface that is used to keep track of
    52  // of the total number of objects for each resource.
    53  // {group}.{resource} is used as the key name to update and retrieve
    54  // the total number of objects for a given resource.
    55  type StorageObjectCountTracker interface {
    56  	// Set is invoked to update the current number of total
    57  	// objects for the given resource
    58  	Set(string, int64)
    59  
    60  	// Get returns the total number of objects for the given resource.
    61  	// The following errors are returned:
    62  	//  - if the count has gone stale for a given resource due to transient
    63  	//    failures ObjectCountStaleErr is returned.
    64  	//  - if the given resource is not being tracked then
    65  	//    ObjectCountNotFoundErr is returned.
    66  	Get(string) (int64, error)
    67  
    68  	// RunUntil starts all the necessary maintenance.
    69  	RunUntil(stopCh <-chan struct{})
    70  }
    71  
    72  // NewStorageObjectCountTracker returns an instance of
    73  // StorageObjectCountTracker interface that can be used to
    74  // keep track of the total number of objects for each resource.
    75  func NewStorageObjectCountTracker() StorageObjectCountTracker {
    76  	return &objectCountTracker{
    77  		clock:  &clock.RealClock{},
    78  		counts: map[string]*timestampedCount{},
    79  	}
    80  }
    81  
    82  // timestampedCount stores the count of a given resource with a last updated
    83  // timestamp so we can prune it after it goes stale for certain threshold.
    84  type timestampedCount struct {
    85  	count         int64
    86  	lastUpdatedAt time.Time
    87  }
    88  
    89  // objectCountTracker implements StorageObjectCountTracker with
    90  // reader/writer mutual exclusion lock.
    91  type objectCountTracker struct {
    92  	clock clock.PassiveClock
    93  
    94  	lock   sync.RWMutex
    95  	counts map[string]*timestampedCount
    96  }
    97  
    98  func (t *objectCountTracker) Set(groupResource string, count int64) {
    99  	if count <= -1 {
   100  		// a value of -1 indicates that the 'Count' call failed to contact
   101  		// the storage layer, in most cases this error can be transient.
   102  		// we will continue to work with the count that is in the cache
   103  		// up to a certain threshold defined by staleTolerationThreshold.
   104  		// in case this becomes a non transient error then the count for
   105  		// the given resource will will eventually be removed from
   106  		// the cache by the pruner.
   107  		return
   108  	}
   109  
   110  	now := t.clock.Now()
   111  
   112  	// lock for writing
   113  	t.lock.Lock()
   114  	defer t.lock.Unlock()
   115  
   116  	if item, ok := t.counts[groupResource]; ok {
   117  		item.count = count
   118  		item.lastUpdatedAt = now
   119  		return
   120  	}
   121  
   122  	t.counts[groupResource] = &timestampedCount{
   123  		count:         count,
   124  		lastUpdatedAt: now,
   125  	}
   126  }
   127  
   128  func (t *objectCountTracker) Get(groupResource string) (int64, error) {
   129  	staleThreshold := t.clock.Now().Add(-staleTolerationThreshold)
   130  
   131  	t.lock.RLock()
   132  	defer t.lock.RUnlock()
   133  
   134  	if item, ok := t.counts[groupResource]; ok {
   135  		if item.lastUpdatedAt.Before(staleThreshold) {
   136  			return item.count, ObjectCountStaleErr
   137  		}
   138  		return item.count, nil
   139  	}
   140  	return 0, ObjectCountNotFoundErr
   141  }
   142  
   143  // RunUntil runs all the necessary maintenance.
   144  func (t *objectCountTracker) RunUntil(stopCh <-chan struct{}) {
   145  	wait.PollUntil(
   146  		pruneInterval,
   147  		func() (bool, error) {
   148  			// always prune at every pruneInterval
   149  			return false, t.prune(pruneInterval)
   150  		}, stopCh)
   151  	klog.InfoS("StorageObjectCountTracker pruner is exiting")
   152  }
   153  
   154  func (t *objectCountTracker) prune(threshold time.Duration) error {
   155  	oldestLastUpdatedAtAllowed := t.clock.Now().Add(-threshold)
   156  
   157  	// lock for writing
   158  	t.lock.Lock()
   159  	defer t.lock.Unlock()
   160  
   161  	for groupResource, count := range t.counts {
   162  		if count.lastUpdatedAt.After(oldestLastUpdatedAtAllowed) {
   163  			continue
   164  		}
   165  		delete(t.counts, groupResource)
   166  	}
   167  
   168  	return nil
   169  }