k8s.io/client-go@v0.22.2/tools/leaderelection/leaderelection.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package leaderelection implements leader election of a set of endpoints. 18 // It uses an annotation in the endpoints object to store the record of the 19 // election state. This implementation does not guarantee that only one 20 // client is acting as a leader (a.k.a. fencing). 21 // 22 // A client only acts on timestamps captured locally to infer the state of the 23 // leader election. The client does not consider timestamps in the leader 24 // election record to be accurate because these timestamps may not have been 25 // produced by a local clock. The implemention does not depend on their 26 // accuracy and only uses their change to indicate that another client has 27 // renewed the leader lease. Thus the implementation is tolerant to arbitrary 28 // clock skew, but is not tolerant to arbitrary clock skew rate. 29 // 30 // However the level of tolerance to skew rate can be configured by setting 31 // RenewDeadline and LeaseDuration appropriately. The tolerance expressed as a 32 // maximum tolerated ratio of time passed on the fastest node to time passed on 33 // the slowest node can be approximately achieved with a configuration that sets 34 // the same ratio of LeaseDuration to RenewDeadline. For example if a user wanted 35 // to tolerate some nodes progressing forward in time twice as fast as other nodes, 36 // the user could set LeaseDuration to 60 seconds and RenewDeadline to 30 seconds. 37 // 38 // While not required, some method of clock synchronization between nodes in the 39 // cluster is highly recommended. It's important to keep in mind when configuring 40 // this client that the tolerance to skew rate varies inversely to master 41 // availability. 42 // 43 // Larger clusters often have a more lenient SLA for API latency. This should be 44 // taken into account when configuring the client. The rate of leader transitions 45 // should be monitored and RetryPeriod and LeaseDuration should be increased 46 // until the rate is stable and acceptably low. It's important to keep in mind 47 // when configuring this client that the tolerance to API latency varies inversely 48 // to master availability. 49 // 50 // DISCLAIMER: this is an alpha API. This library will likely change significantly 51 // or even be removed entirely in subsequent releases. Depend on this API at 52 // your own risk. 53 package leaderelection 54 55 import ( 56 "bytes" 57 "context" 58 "fmt" 59 "sync" 60 "time" 61 62 "k8s.io/apimachinery/pkg/api/errors" 63 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 64 "k8s.io/apimachinery/pkg/util/clock" 65 "k8s.io/apimachinery/pkg/util/runtime" 66 "k8s.io/apimachinery/pkg/util/wait" 67 rl "k8s.io/client-go/tools/leaderelection/resourcelock" 68 69 "k8s.io/klog/v2" 70 ) 71 72 const ( 73 JitterFactor = 1.2 74 ) 75 76 // NewLeaderElector creates a LeaderElector from a LeaderElectionConfig 77 func NewLeaderElector(lec LeaderElectionConfig) (*LeaderElector, error) { 78 if lec.LeaseDuration <= lec.RenewDeadline { 79 return nil, fmt.Errorf("leaseDuration must be greater than renewDeadline") 80 } 81 if lec.RenewDeadline <= time.Duration(JitterFactor*float64(lec.RetryPeriod)) { 82 return nil, fmt.Errorf("renewDeadline must be greater than retryPeriod*JitterFactor") 83 } 84 if lec.LeaseDuration < 1 { 85 return nil, fmt.Errorf("leaseDuration must be greater than zero") 86 } 87 if lec.RenewDeadline < 1 { 88 return nil, fmt.Errorf("renewDeadline must be greater than zero") 89 } 90 if lec.RetryPeriod < 1 { 91 return nil, fmt.Errorf("retryPeriod must be greater than zero") 92 } 93 if lec.Callbacks.OnStartedLeading == nil { 94 return nil, fmt.Errorf("OnStartedLeading callback must not be nil") 95 } 96 if lec.Callbacks.OnStoppedLeading == nil { 97 return nil, fmt.Errorf("OnStoppedLeading callback must not be nil") 98 } 99 100 if lec.Lock == nil { 101 return nil, fmt.Errorf("Lock must not be nil.") 102 } 103 le := LeaderElector{ 104 config: lec, 105 clock: clock.RealClock{}, 106 metrics: globalMetricsFactory.newLeaderMetrics(), 107 } 108 le.metrics.leaderOff(le.config.Name) 109 return &le, nil 110 } 111 112 type LeaderElectionConfig struct { 113 // Lock is the resource that will be used for locking 114 Lock rl.Interface 115 116 // LeaseDuration is the duration that non-leader candidates will 117 // wait to force acquire leadership. This is measured against time of 118 // last observed ack. 119 // 120 // A client needs to wait a full LeaseDuration without observing a change to 121 // the record before it can attempt to take over. When all clients are 122 // shutdown and a new set of clients are started with different names against 123 // the same leader record, they must wait the full LeaseDuration before 124 // attempting to acquire the lease. Thus LeaseDuration should be as short as 125 // possible (within your tolerance for clock skew rate) to avoid a possible 126 // long waits in the scenario. 127 // 128 // Core clients default this value to 15 seconds. 129 LeaseDuration time.Duration 130 // RenewDeadline is the duration that the acting master will retry 131 // refreshing leadership before giving up. 132 // 133 // Core clients default this value to 10 seconds. 134 RenewDeadline time.Duration 135 // RetryPeriod is the duration the LeaderElector clients should wait 136 // between tries of actions. 137 // 138 // Core clients default this value to 2 seconds. 139 RetryPeriod time.Duration 140 141 // Callbacks are callbacks that are triggered during certain lifecycle 142 // events of the LeaderElector 143 Callbacks LeaderCallbacks 144 145 // WatchDog is the associated health checker 146 // WatchDog may be null if its not needed/configured. 147 WatchDog *HealthzAdaptor 148 149 // ReleaseOnCancel should be set true if the lock should be released 150 // when the run context is cancelled. If you set this to true, you must 151 // ensure all code guarded by this lease has successfully completed 152 // prior to cancelling the context, or you may have two processes 153 // simultaneously acting on the critical path. 154 ReleaseOnCancel bool 155 156 // Name is the name of the resource lock for debugging 157 Name string 158 } 159 160 // LeaderCallbacks are callbacks that are triggered during certain 161 // lifecycle events of the LeaderElector. These are invoked asynchronously. 162 // 163 // possible future callbacks: 164 // * OnChallenge() 165 type LeaderCallbacks struct { 166 // OnStartedLeading is called when a LeaderElector client starts leading 167 OnStartedLeading func(context.Context) 168 // OnStoppedLeading is called when a LeaderElector client stops leading 169 OnStoppedLeading func() 170 // OnNewLeader is called when the client observes a leader that is 171 // not the previously observed leader. This includes the first observed 172 // leader when the client starts. 173 OnNewLeader func(identity string) 174 } 175 176 // LeaderElector is a leader election client. 177 type LeaderElector struct { 178 config LeaderElectionConfig 179 // internal bookkeeping 180 observedRecord rl.LeaderElectionRecord 181 observedRawRecord []byte 182 observedTime time.Time 183 // used to implement OnNewLeader(), may lag slightly from the 184 // value observedRecord.HolderIdentity if the transition has 185 // not yet been reported. 186 reportedLeader string 187 188 // clock is wrapper around time to allow for less flaky testing 189 clock clock.Clock 190 191 // used to lock the observedRecord 192 observedRecordLock sync.Mutex 193 194 metrics leaderMetricsAdapter 195 } 196 197 // Run starts the leader election loop. Run will not return 198 // before leader election loop is stopped by ctx or it has 199 // stopped holding the leader lease 200 func (le *LeaderElector) Run(ctx context.Context) { 201 defer runtime.HandleCrash() 202 defer func() { 203 le.config.Callbacks.OnStoppedLeading() 204 }() 205 206 if !le.acquire(ctx) { 207 return // ctx signalled done 208 } 209 ctx, cancel := context.WithCancel(ctx) 210 defer cancel() 211 go le.config.Callbacks.OnStartedLeading(ctx) 212 le.renew(ctx) 213 } 214 215 // RunOrDie starts a client with the provided config or panics if the config 216 // fails to validate. RunOrDie blocks until leader election loop is 217 // stopped by ctx or it has stopped holding the leader lease 218 func RunOrDie(ctx context.Context, lec LeaderElectionConfig) { 219 le, err := NewLeaderElector(lec) 220 if err != nil { 221 panic(err) 222 } 223 if lec.WatchDog != nil { 224 lec.WatchDog.SetLeaderElection(le) 225 } 226 le.Run(ctx) 227 } 228 229 // GetLeader returns the identity of the last observed leader or returns the empty string if 230 // no leader has yet been observed. 231 // This function is for informational purposes. (e.g. monitoring, logs, etc.) 232 func (le *LeaderElector) GetLeader() string { 233 return le.getObservedRecord().HolderIdentity 234 } 235 236 // IsLeader returns true if the last observed leader was this client else returns false. 237 func (le *LeaderElector) IsLeader() bool { 238 return le.getObservedRecord().HolderIdentity == le.config.Lock.Identity() 239 } 240 241 // acquire loops calling tryAcquireOrRenew and returns true immediately when tryAcquireOrRenew succeeds. 242 // Returns false if ctx signals done. 243 func (le *LeaderElector) acquire(ctx context.Context) bool { 244 ctx, cancel := context.WithCancel(ctx) 245 defer cancel() 246 succeeded := false 247 desc := le.config.Lock.Describe() 248 klog.Infof("attempting to acquire leader lease %v...", desc) 249 wait.JitterUntil(func() { 250 succeeded = le.tryAcquireOrRenew(ctx) 251 le.maybeReportTransition() 252 if !succeeded { 253 klog.V(4).Infof("failed to acquire lease %v", desc) 254 return 255 } 256 le.config.Lock.RecordEvent("became leader") 257 le.metrics.leaderOn(le.config.Name) 258 klog.Infof("successfully acquired lease %v", desc) 259 cancel() 260 }, le.config.RetryPeriod, JitterFactor, true, ctx.Done()) 261 return succeeded 262 } 263 264 // renew loops calling tryAcquireOrRenew and returns immediately when tryAcquireOrRenew fails or ctx signals done. 265 func (le *LeaderElector) renew(ctx context.Context) { 266 ctx, cancel := context.WithCancel(ctx) 267 defer cancel() 268 wait.Until(func() { 269 timeoutCtx, timeoutCancel := context.WithTimeout(ctx, le.config.RenewDeadline) 270 defer timeoutCancel() 271 err := wait.PollImmediateUntil(le.config.RetryPeriod, func() (bool, error) { 272 return le.tryAcquireOrRenew(timeoutCtx), nil 273 }, timeoutCtx.Done()) 274 275 le.maybeReportTransition() 276 desc := le.config.Lock.Describe() 277 if err == nil { 278 klog.V(5).Infof("successfully renewed lease %v", desc) 279 return 280 } 281 le.config.Lock.RecordEvent("stopped leading") 282 le.metrics.leaderOff(le.config.Name) 283 klog.Infof("failed to renew lease %v: %v", desc, err) 284 cancel() 285 }, le.config.RetryPeriod, ctx.Done()) 286 287 // if we hold the lease, give it up 288 if le.config.ReleaseOnCancel { 289 le.release() 290 } 291 } 292 293 // release attempts to release the leader lease if we have acquired it. 294 func (le *LeaderElector) release() bool { 295 if !le.IsLeader() { 296 return true 297 } 298 now := metav1.Now() 299 leaderElectionRecord := rl.LeaderElectionRecord{ 300 LeaderTransitions: le.observedRecord.LeaderTransitions, 301 LeaseDurationSeconds: 1, 302 RenewTime: now, 303 AcquireTime: now, 304 } 305 if err := le.config.Lock.Update(context.TODO(), leaderElectionRecord); err != nil { 306 klog.Errorf("Failed to release lock: %v", err) 307 return false 308 } 309 310 le.setObservedRecord(&leaderElectionRecord) 311 return true 312 } 313 314 // tryAcquireOrRenew tries to acquire a leader lease if it is not already acquired, 315 // else it tries to renew the lease if it has already been acquired. Returns true 316 // on success else returns false. 317 func (le *LeaderElector) tryAcquireOrRenew(ctx context.Context) bool { 318 now := metav1.Now() 319 leaderElectionRecord := rl.LeaderElectionRecord{ 320 HolderIdentity: le.config.Lock.Identity(), 321 LeaseDurationSeconds: int(le.config.LeaseDuration / time.Second), 322 RenewTime: now, 323 AcquireTime: now, 324 } 325 326 // 1. obtain or create the ElectionRecord 327 oldLeaderElectionRecord, oldLeaderElectionRawRecord, err := le.config.Lock.Get(ctx) 328 if err != nil { 329 if !errors.IsNotFound(err) { 330 klog.Errorf("error retrieving resource lock %v: %v", le.config.Lock.Describe(), err) 331 return false 332 } 333 if err = le.config.Lock.Create(ctx, leaderElectionRecord); err != nil { 334 klog.Errorf("error initially creating leader election record: %v", err) 335 return false 336 } 337 338 le.setObservedRecord(&leaderElectionRecord) 339 340 return true 341 } 342 343 // 2. Record obtained, check the Identity & Time 344 if !bytes.Equal(le.observedRawRecord, oldLeaderElectionRawRecord) { 345 le.setObservedRecord(oldLeaderElectionRecord) 346 347 le.observedRawRecord = oldLeaderElectionRawRecord 348 } 349 if len(oldLeaderElectionRecord.HolderIdentity) > 0 && 350 le.observedTime.Add(le.config.LeaseDuration).After(now.Time) && 351 !le.IsLeader() { 352 klog.V(4).Infof("lock is held by %v and has not yet expired", oldLeaderElectionRecord.HolderIdentity) 353 return false 354 } 355 356 // 3. We're going to try to update. The leaderElectionRecord is set to it's default 357 // here. Let's correct it before updating. 358 if le.IsLeader() { 359 leaderElectionRecord.AcquireTime = oldLeaderElectionRecord.AcquireTime 360 leaderElectionRecord.LeaderTransitions = oldLeaderElectionRecord.LeaderTransitions 361 } else { 362 leaderElectionRecord.LeaderTransitions = oldLeaderElectionRecord.LeaderTransitions + 1 363 } 364 365 // update the lock itself 366 if err = le.config.Lock.Update(ctx, leaderElectionRecord); err != nil { 367 klog.Errorf("Failed to update lock: %v", err) 368 return false 369 } 370 371 le.setObservedRecord(&leaderElectionRecord) 372 return true 373 } 374 375 func (le *LeaderElector) maybeReportTransition() { 376 if le.observedRecord.HolderIdentity == le.reportedLeader { 377 return 378 } 379 le.reportedLeader = le.observedRecord.HolderIdentity 380 if le.config.Callbacks.OnNewLeader != nil { 381 go le.config.Callbacks.OnNewLeader(le.reportedLeader) 382 } 383 } 384 385 // Check will determine if the current lease is expired by more than timeout. 386 func (le *LeaderElector) Check(maxTolerableExpiredLease time.Duration) error { 387 if !le.IsLeader() { 388 // Currently not concerned with the case that we are hot standby 389 return nil 390 } 391 // If we are more than timeout seconds after the lease duration that is past the timeout 392 // on the lease renew. Time to start reporting ourselves as unhealthy. We should have 393 // died but conditions like deadlock can prevent this. (See #70819) 394 if le.clock.Since(le.observedTime) > le.config.LeaseDuration+maxTolerableExpiredLease { 395 return fmt.Errorf("failed election to renew leadership on lease %s", le.config.Name) 396 } 397 398 return nil 399 } 400 401 // setObservedRecord will set a new observedRecord and update observedTime to the current time. 402 // Protect critical sections with lock. 403 func (le *LeaderElector) setObservedRecord(observedRecord *rl.LeaderElectionRecord) { 404 le.observedRecordLock.Lock() 405 defer le.observedRecordLock.Unlock() 406 407 le.observedRecord = *observedRecord 408 le.observedTime = le.clock.Now() 409 } 410 411 // getObservedRecord returns observersRecord. 412 // Protect critical sections with lock. 413 func (le *LeaderElector) getObservedRecord() rl.LeaderElectionRecord { 414 le.observedRecordLock.Lock() 415 defer le.observedRecordLock.Unlock() 416 417 return le.observedRecord 418 }