k8s.io/apiserver@v0.31.1/pkg/util/flowcontrol/request/object_count_tracker.go (about) 1 /* 2 Copyright 2021 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package request 18 19 import ( 20 "errors" 21 "sync" 22 "time" 23 24 "k8s.io/apimachinery/pkg/util/wait" 25 "k8s.io/klog/v2" 26 "k8s.io/utils/clock" 27 ) 28 29 const ( 30 // type deletion (it applies mostly to CRD) is not a very frequent 31 // operation so we can afford to prune the cache at a large interval. 32 // at the same time, we also want to make sure that the scalability 33 // tests hit this code path. 34 pruneInterval = 1 * time.Hour 35 36 // the storage layer polls for object count at every 1m interval, we will allow 37 // up to 2-3 transient failures to get the latest count for a given resource. 38 staleTolerationThreshold = 3 * time.Minute 39 ) 40 41 var ( 42 // ObjectCountNotFoundErr is returned when the object count for 43 // a given resource is not being tracked. 44 ObjectCountNotFoundErr = errors.New("object count not found for the given resource") 45 46 // ObjectCountStaleErr is returned when the object count for a 47 // given resource has gone stale due to transient failures. 48 ObjectCountStaleErr = errors.New("object count has gone stale for the given resource") 49 ) 50 51 // StorageObjectCountTracker is an interface that is used to keep track of 52 // of the total number of objects for each resource. 53 // {group}.{resource} is used as the key name to update and retrieve 54 // the total number of objects for a given resource. 55 type StorageObjectCountTracker interface { 56 // Set is invoked to update the current number of total 57 // objects for the given resource 58 Set(string, int64) 59 60 // Get returns the total number of objects for the given resource. 61 // The following errors are returned: 62 // - if the count has gone stale for a given resource due to transient 63 // failures ObjectCountStaleErr is returned. 64 // - if the given resource is not being tracked then 65 // ObjectCountNotFoundErr is returned. 66 Get(string) (int64, error) 67 68 // RunUntil starts all the necessary maintenance. 69 RunUntil(stopCh <-chan struct{}) 70 } 71 72 // NewStorageObjectCountTracker returns an instance of 73 // StorageObjectCountTracker interface that can be used to 74 // keep track of the total number of objects for each resource. 75 func NewStorageObjectCountTracker() StorageObjectCountTracker { 76 return &objectCountTracker{ 77 clock: &clock.RealClock{}, 78 counts: map[string]*timestampedCount{}, 79 } 80 } 81 82 // timestampedCount stores the count of a given resource with a last updated 83 // timestamp so we can prune it after it goes stale for certain threshold. 84 type timestampedCount struct { 85 count int64 86 lastUpdatedAt time.Time 87 } 88 89 // objectCountTracker implements StorageObjectCountTracker with 90 // reader/writer mutual exclusion lock. 91 type objectCountTracker struct { 92 clock clock.PassiveClock 93 94 lock sync.RWMutex 95 counts map[string]*timestampedCount 96 } 97 98 func (t *objectCountTracker) Set(groupResource string, count int64) { 99 if count <= -1 { 100 // a value of -1 indicates that the 'Count' call failed to contact 101 // the storage layer, in most cases this error can be transient. 102 // we will continue to work with the count that is in the cache 103 // up to a certain threshold defined by staleTolerationThreshold. 104 // in case this becomes a non transient error then the count for 105 // the given resource will will eventually be removed from 106 // the cache by the pruner. 107 return 108 } 109 110 now := t.clock.Now() 111 112 // lock for writing 113 t.lock.Lock() 114 defer t.lock.Unlock() 115 116 if item, ok := t.counts[groupResource]; ok { 117 item.count = count 118 item.lastUpdatedAt = now 119 return 120 } 121 122 t.counts[groupResource] = ×tampedCount{ 123 count: count, 124 lastUpdatedAt: now, 125 } 126 } 127 128 func (t *objectCountTracker) Get(groupResource string) (int64, error) { 129 staleThreshold := t.clock.Now().Add(-staleTolerationThreshold) 130 131 t.lock.RLock() 132 defer t.lock.RUnlock() 133 134 if item, ok := t.counts[groupResource]; ok { 135 if item.lastUpdatedAt.Before(staleThreshold) { 136 return item.count, ObjectCountStaleErr 137 } 138 return item.count, nil 139 } 140 return 0, ObjectCountNotFoundErr 141 } 142 143 // RunUntil runs all the necessary maintenance. 144 func (t *objectCountTracker) RunUntil(stopCh <-chan struct{}) { 145 wait.PollUntil( 146 pruneInterval, 147 func() (bool, error) { 148 // always prune at every pruneInterval 149 return false, t.prune(pruneInterval) 150 }, stopCh) 151 klog.InfoS("StorageObjectCountTracker pruner is exiting") 152 } 153 154 func (t *objectCountTracker) prune(threshold time.Duration) error { 155 oldestLastUpdatedAtAllowed := t.clock.Now().Add(-threshold) 156 157 // lock for writing 158 t.lock.Lock() 159 defer t.lock.Unlock() 160 161 for groupResource, count := range t.counts { 162 if count.lastUpdatedAt.After(oldestLastUpdatedAtAllowed) { 163 continue 164 } 165 delete(t.counts, groupResource) 166 } 167 168 return nil 169 }