github.com/kubeflow/training-operator@v1.7.0/pkg/controller.v1/expectation/expectation.go (about) 1 /* 2 Copyright 2023 The Kubeflow Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package expectation 18 19 import ( 20 "fmt" 21 "sync/atomic" 22 "time" 23 24 log "github.com/sirupsen/logrus" 25 26 "k8s.io/client-go/tools/cache" 27 "k8s.io/utils/clock" 28 ) 29 30 const ( 31 // If a watch drops a delete event for a pod, it'll take this long 32 // before a dormant controller waiting for those packets is woken up anyway. It is 33 // specifically targeted at the case where some problem prevents an update 34 // of expectations, without it the controller could stay asleep forever. This should 35 // be set based on the expected latency of watch events. 36 // 37 // Currently a controller can service (create *and* observe the watch events for said 38 // creation) about 10 pods a second, so it takes about 1 min to service 39 // 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s 40 // latency/pod at the scale of 3000 pods over 100 nodes. 41 ExpectationsTimeout = 5 * time.Minute 42 ) 43 44 // Expectations are a way for controllers to tell the controller manager what they expect. eg: 45 // ControllerExpectations: { 46 // controller1: expects 2 adds in 2 minutes 47 // controller2: expects 2 dels in 2 minutes 48 // controller3: expects -1 adds in 2 minutes => controller3's expectations have already been met 49 // } 50 // 51 // Implementation: 52 // ControlleeExpectation = pair of atomic counters to track controllee's creation/deletion 53 // ControllerExpectationsStore = TTLStore + a ControlleeExpectation per controller 54 // 55 // * Once set expectations can only be lowered 56 // * A controller isn't synced till its expectations are either fulfilled, or expire 57 // * Controllers that don't set expectations will get woken up for every matching controllee 58 59 // ExpKeyFunc to parse out the key from a ControlleeExpectation 60 var ExpKeyFunc = func(obj interface{}) (string, error) { 61 if e, ok := obj.(*ControlleeExpectations); ok { 62 return e.key, nil 63 } 64 return "", fmt.Errorf("could not find key for obj %#v", obj) 65 } 66 67 // ControllerExpectationsInterface is an interface that allows users to set and wait on expectations. 68 // Only abstracted out for testing. 69 // Warning: if using KeyFunc it is not safe to use a single ControllerExpectationsInterface with different 70 // types of controllers, because the keys might conflict across types. 71 type ControllerExpectationsInterface interface { 72 GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) 73 SatisfiedExpectations(controllerKey string) bool 74 DeleteExpectations(controllerKey string) 75 SetExpectations(controllerKey string, add, del int) error 76 ExpectCreations(controllerKey string, adds int) error 77 ExpectDeletions(controllerKey string, dels int) error 78 CreationObserved(controllerKey string) 79 DeletionObserved(controllerKey string) 80 RaiseExpectations(controllerKey string, add, del int) 81 LowerExpectations(controllerKey string, add, del int) 82 } 83 84 // ControllerExpectations is a cache mapping controllers to what they expect to see before being woken up for a sync. 85 type ControllerExpectations struct { 86 cache.Store 87 } 88 89 // GetExpectations returns the ControlleeExpectations of the given controller. 90 func (r *ControllerExpectations) GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) { 91 exp, exists, err := r.GetByKey(controllerKey) 92 if err == nil && exists { 93 return exp.(*ControlleeExpectations), true, nil 94 } 95 return nil, false, err 96 } 97 98 // DeleteExpectations deletes the expectations of the given controller from the TTLStore. 99 func (r *ControllerExpectations) DeleteExpectations(controllerKey string) { 100 if exp, exists, err := r.GetByKey(controllerKey); err == nil && exists { 101 if err := r.Delete(exp); err != nil { 102 log.Debugf("Error deleting expectations for controller %v: %v", controllerKey, err) 103 } 104 } 105 } 106 107 // SatisfiedExpectations returns true if the required adds/dels for the given controller have been observed. 108 // Add/del counts are established by the controller at sync time, and updated as controllees are observed by the controller 109 // manager. 110 func (r *ControllerExpectations) SatisfiedExpectations(controllerKey string) bool { 111 if exp, exists, err := r.GetExpectations(controllerKey); exists { 112 if exp.Fulfilled() { 113 log.Debugf("Controller expectations fulfilled %#v", exp) 114 return true 115 } else if exp.isExpired() { 116 log.Debugf("Controller expectations expired %#v", exp) 117 return true 118 } else { 119 log.Debugf("Controller still waiting on expectations %#v", exp) 120 return false 121 } 122 } else if err != nil { 123 log.Debugf("Error encountered while checking expectations %#v, forcing sync", err) 124 } else { 125 // When a new controller is created, it doesn't have expectations. 126 // When it doesn't see expected watch events for > TTL, the expectations expire. 127 // - In this case it wakes up, creates/deletes controllees, and sets expectations again. 128 // When it has satisfied expectations and no controllees need to be created/destroyed > TTL, the expectations expire. 129 // - In this case it continues without setting expectations till it needs to create/delete controllees. 130 log.Debugf("Controller %v either never recorded expectations, or the ttl expired.", controllerKey) 131 } 132 // Trigger a sync if we either encountered and error (which shouldn't happen since we're 133 // getting from local store) or this controller hasn't established expectations. 134 return true 135 } 136 137 // TODO: Extend ExpirationCache to support explicit expiration. 138 // TODO: Make this possible to disable in tests. 139 // TODO: Support injection of clock. 140 func (exp *ControlleeExpectations) isExpired() bool { 141 return clock.RealClock{}.Since(exp.timestamp) > ExpectationsTimeout 142 } 143 144 // SetExpectations registers new expectations for the given controller. Forgets existing expectations. 145 func (r *ControllerExpectations) SetExpectations(controllerKey string, add, del int) error { 146 exp := &ControlleeExpectations{add: int64(add), del: int64(del), key: controllerKey, timestamp: clock.RealClock{}.Now()} 147 log.Debugf("Setting expectations %#v", exp) 148 return r.Add(exp) 149 } 150 151 func (r *ControllerExpectations) ExpectCreations(controllerKey string, adds int) error { 152 return r.SetExpectations(controllerKey, adds, 0) 153 } 154 155 func (r *ControllerExpectations) ExpectDeletions(controllerKey string, dels int) error { 156 return r.SetExpectations(controllerKey, 0, dels) 157 } 158 159 // Decrements the expectation counts of the given controller. 160 func (r *ControllerExpectations) LowerExpectations(controllerKey string, add, del int) { 161 if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists { 162 exp.Add(int64(-add), int64(-del)) 163 // The expectations might've been modified since the update on the previous line. 164 log.Debugf("Lowered expectations %#v", exp) 165 } 166 } 167 168 // Increments the expectation counts of the given controller. 169 func (r *ControllerExpectations) RaiseExpectations(controllerKey string, add, del int) { 170 if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists { 171 exp.Add(int64(add), int64(del)) 172 // The expectations might've been modified since the update on the previous line. 173 log.Debugf("Raised expectations %#v", exp) 174 } 175 } 176 177 // CreationObserved atomically decrements the `add` expectation count of the given controller. 178 func (r *ControllerExpectations) CreationObserved(controllerKey string) { 179 r.LowerExpectations(controllerKey, 1, 0) 180 } 181 182 // DeletionObserved atomically decrements the `del` expectation count of the given controller. 183 func (r *ControllerExpectations) DeletionObserved(controllerKey string) { 184 r.LowerExpectations(controllerKey, 0, 1) 185 } 186 187 // Expectations are either fulfilled, or expire naturally. 188 type Expectations interface { 189 Fulfilled() bool 190 } 191 192 // ControlleeExpectations track controllee creates/deletes. 193 type ControlleeExpectations struct { 194 // Important: Since these two int64 fields are using sync/atomic, they have to be at the top of the struct due to a bug on 32-bit platforms 195 // See: https://golang.org/pkg/sync/atomic/ for more information 196 add int64 197 del int64 198 key string 199 timestamp time.Time 200 } 201 202 // Add increments the add and del counters. 203 func (e *ControlleeExpectations) Add(add, del int64) { 204 atomic.AddInt64(&e.add, add) 205 atomic.AddInt64(&e.del, del) 206 } 207 208 // Fulfilled returns true if this expectation has been fulfilled. 209 func (e *ControlleeExpectations) Fulfilled() bool { 210 // TODO: think about why this line being atomic doesn't matter 211 return atomic.LoadInt64(&e.add) <= 0 && atomic.LoadInt64(&e.del) <= 0 212 } 213 214 // GetExpectations returns the add and del expectations of the controllee. 215 func (e *ControlleeExpectations) GetExpectations() (int64, int64) { 216 return atomic.LoadInt64(&e.add), atomic.LoadInt64(&e.del) 217 } 218 219 // NewControllerExpectations returns a store for ControllerExpectations. 220 func NewControllerExpectations() *ControllerExpectations { 221 return &ControllerExpectations{cache.NewStore(ExpKeyFunc)} 222 }