k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/controller_utils.go (about) 1 /* 2 Copyright 2014 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package controller 18 19 import ( 20 "context" 21 "encoding/binary" 22 "encoding/json" 23 "fmt" 24 "hash/fnv" 25 "math" 26 "sync" 27 "sync/atomic" 28 "time" 29 30 apps "k8s.io/api/apps/v1" 31 v1 "k8s.io/api/core/v1" 32 apierrors "k8s.io/apimachinery/pkg/api/errors" 33 "k8s.io/apimachinery/pkg/api/meta" 34 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 35 "k8s.io/apimachinery/pkg/labels" 36 "k8s.io/apimachinery/pkg/runtime" 37 "k8s.io/apimachinery/pkg/types" 38 "k8s.io/apimachinery/pkg/util/rand" 39 "k8s.io/apimachinery/pkg/util/sets" 40 "k8s.io/apimachinery/pkg/util/strategicpatch" 41 "k8s.io/apimachinery/pkg/util/wait" 42 utilfeature "k8s.io/apiserver/pkg/util/feature" 43 clientset "k8s.io/client-go/kubernetes" 44 "k8s.io/client-go/tools/cache" 45 "k8s.io/client-go/tools/record" 46 clientretry "k8s.io/client-go/util/retry" 47 podutil "k8s.io/kubernetes/pkg/api/v1/pod" 48 "k8s.io/kubernetes/pkg/apis/core/helper" 49 _ "k8s.io/kubernetes/pkg/apis/core/install" 50 "k8s.io/kubernetes/pkg/apis/core/validation" 51 "k8s.io/kubernetes/pkg/features" 52 hashutil "k8s.io/kubernetes/pkg/util/hash" 53 taintutils "k8s.io/kubernetes/pkg/util/taints" 54 "k8s.io/utils/clock" 55 56 "k8s.io/klog/v2" 57 ) 58 59 const ( 60 // If a watch drops a delete event for a pod, it'll take this long 61 // before a dormant controller waiting for those packets is woken up anyway. It is 62 // specifically targeted at the case where some problem prevents an update 63 // of expectations, without it the controller could stay asleep forever. This should 64 // be set based on the expected latency of watch events. 65 // 66 // Currently a controller can service (create *and* observe the watch events for said 67 // creation) about 10 pods a second, so it takes about 1 min to service 68 // 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s 69 // latency/pod at the scale of 3000 pods over 100 nodes. 70 ExpectationsTimeout = 5 * time.Minute 71 // When batching pod creates, SlowStartInitialBatchSize is the size of the 72 // initial batch. The size of each successive batch is twice the size of 73 // the previous batch. For example, for a value of 1, batch sizes would be 74 // 1, 2, 4, 8, ... and for a value of 10, batch sizes would be 75 // 10, 20, 40, 80, ... Setting the value higher means that quota denials 76 // will result in more doomed API calls and associated event spam. Setting 77 // the value lower will result in more API call round trip periods for 78 // large batches. 79 // 80 // Given a number of pods to start "N": 81 // The number of doomed calls per sync once quota is exceeded is given by: 82 // min(N,SlowStartInitialBatchSize) 83 // The number of batches is given by: 84 // 1+floor(log_2(ceil(N/SlowStartInitialBatchSize))) 85 SlowStartInitialBatchSize = 1 86 ) 87 88 var UpdateTaintBackoff = wait.Backoff{ 89 Steps: 5, 90 Duration: 100 * time.Millisecond, 91 Jitter: 1.0, 92 } 93 94 var UpdateLabelBackoff = wait.Backoff{ 95 Steps: 5, 96 Duration: 100 * time.Millisecond, 97 Jitter: 1.0, 98 } 99 100 var ( 101 KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc 102 podPhaseToOrdinal = map[v1.PodPhase]int{v1.PodPending: 0, v1.PodUnknown: 1, v1.PodRunning: 2} 103 ) 104 105 type ResyncPeriodFunc func() time.Duration 106 107 // Returns 0 for resyncPeriod in case resyncing is not needed. 108 func NoResyncPeriodFunc() time.Duration { 109 return 0 110 } 111 112 // StaticResyncPeriodFunc returns the resync period specified 113 func StaticResyncPeriodFunc(resyncPeriod time.Duration) ResyncPeriodFunc { 114 return func() time.Duration { 115 return resyncPeriod 116 } 117 } 118 119 // Expectations are a way for controllers to tell the controller manager what they expect. eg: 120 // ControllerExpectations: { 121 // controller1: expects 2 adds in 2 minutes 122 // controller2: expects 2 dels in 2 minutes 123 // controller3: expects -1 adds in 2 minutes => controller3's expectations have already been met 124 // } 125 // 126 // Implementation: 127 // ControlleeExpectation = pair of atomic counters to track controllee's creation/deletion 128 // ControllerExpectationsStore = TTLStore + a ControlleeExpectation per controller 129 // 130 // * Once set expectations can only be lowered 131 // * A controller isn't synced till its expectations are either fulfilled, or expire 132 // * Controllers that don't set expectations will get woken up for every matching controllee 133 134 // ExpKeyFunc to parse out the key from a ControlleeExpectation 135 var ExpKeyFunc = func(obj interface{}) (string, error) { 136 if e, ok := obj.(*ControlleeExpectations); ok { 137 return e.key, nil 138 } 139 return "", fmt.Errorf("could not find key for obj %#v", obj) 140 } 141 142 // ControllerExpectationsInterface is an interface that allows users to set and wait on expectations. 143 // Only abstracted out for testing. 144 // Warning: if using KeyFunc it is not safe to use a single ControllerExpectationsInterface with different 145 // types of controllers, because the keys might conflict across types. 146 type ControllerExpectationsInterface interface { 147 GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) 148 SatisfiedExpectations(logger klog.Logger, controllerKey string) bool 149 DeleteExpectations(logger klog.Logger, controllerKey string) 150 SetExpectations(logger klog.Logger, controllerKey string, add, del int) error 151 ExpectCreations(logger klog.Logger, controllerKey string, adds int) error 152 ExpectDeletions(logger klog.Logger, controllerKey string, dels int) error 153 CreationObserved(logger klog.Logger, controllerKey string) 154 DeletionObserved(logger klog.Logger, controllerKey string) 155 RaiseExpectations(logger klog.Logger, controllerKey string, add, del int) 156 LowerExpectations(logger klog.Logger, controllerKey string, add, del int) 157 } 158 159 // ControllerExpectations is a cache mapping controllers to what they expect to see before being woken up for a sync. 160 type ControllerExpectations struct { 161 cache.Store 162 } 163 164 // GetExpectations returns the ControlleeExpectations of the given controller. 165 func (r *ControllerExpectations) GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) { 166 exp, exists, err := r.GetByKey(controllerKey) 167 if err == nil && exists { 168 return exp.(*ControlleeExpectations), true, nil 169 } 170 return nil, false, err 171 } 172 173 // DeleteExpectations deletes the expectations of the given controller from the TTLStore. 174 func (r *ControllerExpectations) DeleteExpectations(logger klog.Logger, controllerKey string) { 175 if exp, exists, err := r.GetByKey(controllerKey); err == nil && exists { 176 if err := r.Delete(exp); err != nil { 177 178 logger.V(2).Info("Error deleting expectations", "controller", controllerKey, "err", err) 179 } 180 } 181 } 182 183 // SatisfiedExpectations returns true if the required adds/dels for the given controller have been observed. 184 // Add/del counts are established by the controller at sync time, and updated as controllees are observed by the controller 185 // manager. 186 func (r *ControllerExpectations) SatisfiedExpectations(logger klog.Logger, controllerKey string) bool { 187 if exp, exists, err := r.GetExpectations(controllerKey); exists { 188 if exp.Fulfilled() { 189 logger.V(4).Info("Controller expectations fulfilled", "expectations", exp) 190 return true 191 } else if exp.isExpired() { 192 logger.V(4).Info("Controller expectations expired", "expectations", exp) 193 return true 194 } else { 195 logger.V(4).Info("Controller still waiting on expectations", "expectations", exp) 196 return false 197 } 198 } else if err != nil { 199 logger.V(2).Info("Error encountered while checking expectations, forcing sync", "err", err) 200 } else { 201 // When a new controller is created, it doesn't have expectations. 202 // When it doesn't see expected watch events for > TTL, the expectations expire. 203 // - In this case it wakes up, creates/deletes controllees, and sets expectations again. 204 // When it has satisfied expectations and no controllees need to be created/destroyed > TTL, the expectations expire. 205 // - In this case it continues without setting expectations till it needs to create/delete controllees. 206 logger.V(4).Info("Controller either never recorded expectations, or the ttl expired", "controller", controllerKey) 207 } 208 // Trigger a sync if we either encountered and error (which shouldn't happen since we're 209 // getting from local store) or this controller hasn't established expectations. 210 return true 211 } 212 213 // TODO: Extend ExpirationCache to support explicit expiration. 214 // TODO: Make this possible to disable in tests. 215 // TODO: Support injection of clock. 216 func (exp *ControlleeExpectations) isExpired() bool { 217 return clock.RealClock{}.Since(exp.timestamp) > ExpectationsTimeout 218 } 219 220 // SetExpectations registers new expectations for the given controller. Forgets existing expectations. 221 func (r *ControllerExpectations) SetExpectations(logger klog.Logger, controllerKey string, add, del int) error { 222 exp := &ControlleeExpectations{add: int64(add), del: int64(del), key: controllerKey, timestamp: clock.RealClock{}.Now()} 223 logger.V(4).Info("Setting expectations", "expectations", exp) 224 return r.Add(exp) 225 } 226 227 func (r *ControllerExpectations) ExpectCreations(logger klog.Logger, controllerKey string, adds int) error { 228 return r.SetExpectations(logger, controllerKey, adds, 0) 229 } 230 231 func (r *ControllerExpectations) ExpectDeletions(logger klog.Logger, controllerKey string, dels int) error { 232 return r.SetExpectations(logger, controllerKey, 0, dels) 233 } 234 235 // Decrements the expectation counts of the given controller. 236 func (r *ControllerExpectations) LowerExpectations(logger klog.Logger, controllerKey string, add, del int) { 237 if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists { 238 exp.Add(int64(-add), int64(-del)) 239 // The expectations might've been modified since the update on the previous line. 240 logger.V(4).Info("Lowered expectations", "expectations", exp) 241 } 242 } 243 244 // Increments the expectation counts of the given controller. 245 func (r *ControllerExpectations) RaiseExpectations(logger klog.Logger, controllerKey string, add, del int) { 246 if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists { 247 exp.Add(int64(add), int64(del)) 248 // The expectations might've been modified since the update on the previous line. 249 logger.V(4).Info("Raised expectations", "expectations", exp) 250 } 251 } 252 253 // CreationObserved atomically decrements the `add` expectation count of the given controller. 254 func (r *ControllerExpectations) CreationObserved(logger klog.Logger, controllerKey string) { 255 r.LowerExpectations(logger, controllerKey, 1, 0) 256 } 257 258 // DeletionObserved atomically decrements the `del` expectation count of the given controller. 259 func (r *ControllerExpectations) DeletionObserved(logger klog.Logger, controllerKey string) { 260 r.LowerExpectations(logger, controllerKey, 0, 1) 261 } 262 263 // ControlleeExpectations track controllee creates/deletes. 264 type ControlleeExpectations struct { 265 // Important: Since these two int64 fields are using sync/atomic, they have to be at the top of the struct due to a bug on 32-bit platforms 266 // See: https://golang.org/pkg/sync/atomic/ for more information 267 add int64 268 del int64 269 key string 270 timestamp time.Time 271 } 272 273 // Add increments the add and del counters. 274 func (e *ControlleeExpectations) Add(add, del int64) { 275 atomic.AddInt64(&e.add, add) 276 atomic.AddInt64(&e.del, del) 277 } 278 279 // Fulfilled returns true if this expectation has been fulfilled. 280 func (e *ControlleeExpectations) Fulfilled() bool { 281 // TODO: think about why this line being atomic doesn't matter 282 return atomic.LoadInt64(&e.add) <= 0 && atomic.LoadInt64(&e.del) <= 0 283 } 284 285 // GetExpectations returns the add and del expectations of the controllee. 286 func (e *ControlleeExpectations) GetExpectations() (int64, int64) { 287 return atomic.LoadInt64(&e.add), atomic.LoadInt64(&e.del) 288 } 289 290 // MarshalLog makes a thread-safe copy of the values of the expectations that 291 // can be used for logging. 292 func (e *ControlleeExpectations) MarshalLog() interface{} { 293 return struct { 294 add int64 295 del int64 296 key string 297 }{ 298 add: atomic.LoadInt64(&e.add), 299 del: atomic.LoadInt64(&e.del), 300 key: e.key, 301 } 302 } 303 304 // NewControllerExpectations returns a store for ControllerExpectations. 305 func NewControllerExpectations() *ControllerExpectations { 306 return &ControllerExpectations{cache.NewStore(ExpKeyFunc)} 307 } 308 309 // UIDSetKeyFunc to parse out the key from a UIDSet. 310 var UIDSetKeyFunc = func(obj interface{}) (string, error) { 311 if u, ok := obj.(*UIDSet); ok { 312 return u.key, nil 313 } 314 return "", fmt.Errorf("could not find key for obj %#v", obj) 315 } 316 317 // UIDSet holds a key and a set of UIDs. Used by the 318 // UIDTrackingControllerExpectations to remember which UID it has seen/still 319 // waiting for. 320 type UIDSet struct { 321 sets.String 322 key string 323 } 324 325 // UIDTrackingControllerExpectations tracks the UID of the pods it deletes. 326 // This cache is needed over plain old expectations to safely handle graceful 327 // deletion. The desired behavior is to treat an update that sets the 328 // DeletionTimestamp on an object as a delete. To do so consistently, one needs 329 // to remember the expected deletes so they aren't double counted. 330 // TODO: Track creates as well (#22599) 331 type UIDTrackingControllerExpectations struct { 332 ControllerExpectationsInterface 333 // TODO: There is a much nicer way to do this that involves a single store, 334 // a lock per entry, and a ControlleeExpectationsInterface type. 335 uidStoreLock sync.Mutex 336 // Store used for the UIDs associated with any expectation tracked via the 337 // ControllerExpectationsInterface. 338 uidStore cache.Store 339 } 340 341 // GetUIDs is a convenience method to avoid exposing the set of expected uids. 342 // The returned set is not thread safe, all modifications must be made holding 343 // the uidStoreLock. 344 func (u *UIDTrackingControllerExpectations) GetUIDs(controllerKey string) sets.String { 345 if uid, exists, err := u.uidStore.GetByKey(controllerKey); err == nil && exists { 346 return uid.(*UIDSet).String 347 } 348 return nil 349 } 350 351 // ExpectDeletions records expectations for the given deleteKeys, against the given controller. 352 func (u *UIDTrackingControllerExpectations) ExpectDeletions(logger klog.Logger, rcKey string, deletedKeys []string) error { 353 expectedUIDs := sets.NewString() 354 for _, k := range deletedKeys { 355 expectedUIDs.Insert(k) 356 } 357 logger.V(4).Info("Controller waiting on deletions", "controller", rcKey, "keys", deletedKeys) 358 u.uidStoreLock.Lock() 359 defer u.uidStoreLock.Unlock() 360 361 if existing := u.GetUIDs(rcKey); existing != nil && existing.Len() != 0 { 362 logger.Error(nil, "Clobbering existing delete keys", "keys", existing) 363 } 364 if err := u.uidStore.Add(&UIDSet{expectedUIDs, rcKey}); err != nil { 365 return err 366 } 367 return u.ControllerExpectationsInterface.ExpectDeletions(logger, rcKey, expectedUIDs.Len()) 368 } 369 370 // DeletionObserved records the given deleteKey as a deletion, for the given rc. 371 func (u *UIDTrackingControllerExpectations) DeletionObserved(logger klog.Logger, rcKey, deleteKey string) { 372 u.uidStoreLock.Lock() 373 defer u.uidStoreLock.Unlock() 374 375 uids := u.GetUIDs(rcKey) 376 if uids != nil && uids.Has(deleteKey) { 377 logger.V(4).Info("Controller received delete for pod", "controller", rcKey, "key", deleteKey) 378 u.ControllerExpectationsInterface.DeletionObserved(logger, rcKey) 379 uids.Delete(deleteKey) 380 } 381 } 382 383 // DeleteExpectations deletes the UID set and invokes DeleteExpectations on the 384 // underlying ControllerExpectationsInterface. 385 func (u *UIDTrackingControllerExpectations) DeleteExpectations(logger klog.Logger, rcKey string) { 386 u.uidStoreLock.Lock() 387 defer u.uidStoreLock.Unlock() 388 389 u.ControllerExpectationsInterface.DeleteExpectations(logger, rcKey) 390 if uidExp, exists, err := u.uidStore.GetByKey(rcKey); err == nil && exists { 391 if err := u.uidStore.Delete(uidExp); err != nil { 392 logger.V(2).Info("Error deleting uid expectations", "controller", rcKey, "err", err) 393 } 394 } 395 } 396 397 // NewUIDTrackingControllerExpectations returns a wrapper around 398 // ControllerExpectations that is aware of deleteKeys. 399 func NewUIDTrackingControllerExpectations(ce ControllerExpectationsInterface) *UIDTrackingControllerExpectations { 400 return &UIDTrackingControllerExpectations{ControllerExpectationsInterface: ce, uidStore: cache.NewStore(UIDSetKeyFunc)} 401 } 402 403 // Reasons for pod events 404 const ( 405 // FailedCreatePodReason is added in an event and in a replica set condition 406 // when a pod for a replica set is failed to be created. 407 FailedCreatePodReason = "FailedCreate" 408 // SuccessfulCreatePodReason is added in an event when a pod for a replica set 409 // is successfully created. 410 SuccessfulCreatePodReason = "SuccessfulCreate" 411 // FailedDeletePodReason is added in an event and in a replica set condition 412 // when a pod for a replica set is failed to be deleted. 413 FailedDeletePodReason = "FailedDelete" 414 // SuccessfulDeletePodReason is added in an event when a pod for a replica set 415 // is successfully deleted. 416 SuccessfulDeletePodReason = "SuccessfulDelete" 417 ) 418 419 // RSControlInterface is an interface that knows how to add or delete 420 // ReplicaSets, as well as increment or decrement them. It is used 421 // by the deployment controller to ease testing of actions that it takes. 422 type RSControlInterface interface { 423 PatchReplicaSet(ctx context.Context, namespace, name string, data []byte) error 424 } 425 426 // RealRSControl is the default implementation of RSControllerInterface. 427 type RealRSControl struct { 428 KubeClient clientset.Interface 429 Recorder record.EventRecorder 430 } 431 432 var _ RSControlInterface = &RealRSControl{} 433 434 func (r RealRSControl) PatchReplicaSet(ctx context.Context, namespace, name string, data []byte) error { 435 _, err := r.KubeClient.AppsV1().ReplicaSets(namespace).Patch(ctx, name, types.StrategicMergePatchType, data, metav1.PatchOptions{}) 436 return err 437 } 438 439 // TODO: merge the controller revision interface in controller_history.go with this one 440 // ControllerRevisionControlInterface is an interface that knows how to patch 441 // ControllerRevisions, as well as increment or decrement them. It is used 442 // by the daemonset controller to ease testing of actions that it takes. 443 type ControllerRevisionControlInterface interface { 444 PatchControllerRevision(ctx context.Context, namespace, name string, data []byte) error 445 } 446 447 // RealControllerRevisionControl is the default implementation of ControllerRevisionControlInterface. 448 type RealControllerRevisionControl struct { 449 KubeClient clientset.Interface 450 } 451 452 var _ ControllerRevisionControlInterface = &RealControllerRevisionControl{} 453 454 func (r RealControllerRevisionControl) PatchControllerRevision(ctx context.Context, namespace, name string, data []byte) error { 455 _, err := r.KubeClient.AppsV1().ControllerRevisions(namespace).Patch(ctx, name, types.StrategicMergePatchType, data, metav1.PatchOptions{}) 456 return err 457 } 458 459 // PodControlInterface is an interface that knows how to add or delete pods 460 // created as an interface to allow testing. 461 type PodControlInterface interface { 462 // CreatePods creates new pods according to the spec, and sets object as the pod's controller. 463 CreatePods(ctx context.Context, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error 464 // CreatePodsWithGenerateName creates new pods according to the spec, sets object as the pod's controller and sets pod's generateName. 465 CreatePodsWithGenerateName(ctx context.Context, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference, generateName string) error 466 // DeletePod deletes the pod identified by podID. 467 DeletePod(ctx context.Context, namespace string, podID string, object runtime.Object) error 468 // PatchPod patches the pod. 469 PatchPod(ctx context.Context, namespace, name string, data []byte) error 470 } 471 472 // RealPodControl is the default implementation of PodControlInterface. 473 type RealPodControl struct { 474 KubeClient clientset.Interface 475 Recorder record.EventRecorder 476 } 477 478 var _ PodControlInterface = &RealPodControl{} 479 480 func getPodsLabelSet(template *v1.PodTemplateSpec) labels.Set { 481 desiredLabels := make(labels.Set) 482 for k, v := range template.Labels { 483 desiredLabels[k] = v 484 } 485 return desiredLabels 486 } 487 488 func getPodsFinalizers(template *v1.PodTemplateSpec) []string { 489 desiredFinalizers := make([]string, len(template.Finalizers)) 490 copy(desiredFinalizers, template.Finalizers) 491 return desiredFinalizers 492 } 493 494 func getPodsAnnotationSet(template *v1.PodTemplateSpec) labels.Set { 495 desiredAnnotations := make(labels.Set) 496 for k, v := range template.Annotations { 497 desiredAnnotations[k] = v 498 } 499 return desiredAnnotations 500 } 501 502 func getPodsPrefix(controllerName string) string { 503 // use the dash (if the name isn't too long) to make the pod name a bit prettier 504 prefix := fmt.Sprintf("%s-", controllerName) 505 if len(validation.ValidatePodName(prefix, true)) != 0 { 506 prefix = controllerName 507 } 508 return prefix 509 } 510 511 func validateControllerRef(controllerRef *metav1.OwnerReference) error { 512 if controllerRef == nil { 513 return fmt.Errorf("controllerRef is nil") 514 } 515 if len(controllerRef.APIVersion) == 0 { 516 return fmt.Errorf("controllerRef has empty APIVersion") 517 } 518 if len(controllerRef.Kind) == 0 { 519 return fmt.Errorf("controllerRef has empty Kind") 520 } 521 if controllerRef.Controller == nil || !*controllerRef.Controller { 522 return fmt.Errorf("controllerRef.Controller is not set to true") 523 } 524 if controllerRef.BlockOwnerDeletion == nil || !*controllerRef.BlockOwnerDeletion { 525 return fmt.Errorf("controllerRef.BlockOwnerDeletion is not set") 526 } 527 return nil 528 } 529 530 func (r RealPodControl) CreatePods(ctx context.Context, namespace string, template *v1.PodTemplateSpec, controllerObject runtime.Object, controllerRef *metav1.OwnerReference) error { 531 return r.CreatePodsWithGenerateName(ctx, namespace, template, controllerObject, controllerRef, "") 532 } 533 534 func (r RealPodControl) CreatePodsWithGenerateName(ctx context.Context, namespace string, template *v1.PodTemplateSpec, controllerObject runtime.Object, controllerRef *metav1.OwnerReference, generateName string) error { 535 if err := validateControllerRef(controllerRef); err != nil { 536 return err 537 } 538 pod, err := GetPodFromTemplate(template, controllerObject, controllerRef) 539 if err != nil { 540 return err 541 } 542 if len(generateName) > 0 { 543 pod.ObjectMeta.GenerateName = generateName 544 } 545 return r.createPods(ctx, namespace, pod, controllerObject) 546 } 547 548 func (r RealPodControl) PatchPod(ctx context.Context, namespace, name string, data []byte) error { 549 _, err := r.KubeClient.CoreV1().Pods(namespace).Patch(ctx, name, types.StrategicMergePatchType, data, metav1.PatchOptions{}) 550 return err 551 } 552 553 func GetPodFromTemplate(template *v1.PodTemplateSpec, parentObject runtime.Object, controllerRef *metav1.OwnerReference) (*v1.Pod, error) { 554 desiredLabels := getPodsLabelSet(template) 555 desiredFinalizers := getPodsFinalizers(template) 556 desiredAnnotations := getPodsAnnotationSet(template) 557 accessor, err := meta.Accessor(parentObject) 558 if err != nil { 559 return nil, fmt.Errorf("parentObject does not have ObjectMeta, %v", err) 560 } 561 prefix := getPodsPrefix(accessor.GetName()) 562 563 pod := &v1.Pod{ 564 ObjectMeta: metav1.ObjectMeta{ 565 Labels: desiredLabels, 566 Annotations: desiredAnnotations, 567 GenerateName: prefix, 568 Finalizers: desiredFinalizers, 569 }, 570 } 571 if controllerRef != nil { 572 pod.OwnerReferences = append(pod.OwnerReferences, *controllerRef) 573 } 574 pod.Spec = *template.Spec.DeepCopy() 575 return pod, nil 576 } 577 578 func (r RealPodControl) createPods(ctx context.Context, namespace string, pod *v1.Pod, object runtime.Object) error { 579 if len(labels.Set(pod.Labels)) == 0 { 580 return fmt.Errorf("unable to create pods, no labels") 581 } 582 newPod, err := r.KubeClient.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{}) 583 if err != nil { 584 // only send an event if the namespace isn't terminating 585 if !apierrors.HasStatusCause(err, v1.NamespaceTerminatingCause) { 586 r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err) 587 } 588 return err 589 } 590 logger := klog.FromContext(ctx) 591 accessor, err := meta.Accessor(object) 592 if err != nil { 593 logger.Error(err, "parentObject does not have ObjectMeta") 594 return nil 595 } 596 logger.V(4).Info("Controller created pod", "controller", accessor.GetName(), "pod", klog.KObj(newPod)) 597 r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulCreatePodReason, "Created pod: %v", newPod.Name) 598 599 return nil 600 } 601 602 func (r RealPodControl) DeletePod(ctx context.Context, namespace string, podID string, object runtime.Object) error { 603 accessor, err := meta.Accessor(object) 604 if err != nil { 605 return fmt.Errorf("object does not have ObjectMeta, %v", err) 606 } 607 logger := klog.FromContext(ctx) 608 logger.V(2).Info("Deleting pod", "controller", accessor.GetName(), "pod", klog.KRef(namespace, podID)) 609 if err := r.KubeClient.CoreV1().Pods(namespace).Delete(ctx, podID, metav1.DeleteOptions{}); err != nil { 610 if apierrors.IsNotFound(err) { 611 logger.V(4).Info("Pod has already been deleted.", "pod", klog.KRef(namespace, podID)) 612 return err 613 } 614 r.Recorder.Eventf(object, v1.EventTypeWarning, FailedDeletePodReason, "Error deleting: %v", err) 615 return fmt.Errorf("unable to delete pods: %v", err) 616 } 617 r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulDeletePodReason, "Deleted pod: %v", podID) 618 619 return nil 620 } 621 622 type FakePodControl struct { 623 sync.Mutex 624 Templates []v1.PodTemplateSpec 625 ControllerRefs []metav1.OwnerReference 626 DeletePodName []string 627 Patches [][]byte 628 Err error 629 CreateLimit int 630 CreateCallCount int 631 } 632 633 var _ PodControlInterface = &FakePodControl{} 634 635 func (f *FakePodControl) PatchPod(ctx context.Context, namespace, name string, data []byte) error { 636 f.Lock() 637 defer f.Unlock() 638 f.Patches = append(f.Patches, data) 639 if f.Err != nil { 640 return f.Err 641 } 642 return nil 643 } 644 645 func (f *FakePodControl) CreatePods(ctx context.Context, namespace string, spec *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error { 646 return f.CreatePodsWithGenerateName(ctx, namespace, spec, object, controllerRef, "") 647 } 648 649 func (f *FakePodControl) CreatePodsWithGenerateName(ctx context.Context, namespace string, spec *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference, generateNamePrefix string) error { 650 f.Lock() 651 defer f.Unlock() 652 f.CreateCallCount++ 653 if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit { 654 return fmt.Errorf("not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount) 655 } 656 spec.GenerateName = generateNamePrefix 657 f.Templates = append(f.Templates, *spec) 658 f.ControllerRefs = append(f.ControllerRefs, *controllerRef) 659 if f.Err != nil { 660 return f.Err 661 } 662 return nil 663 } 664 665 func (f *FakePodControl) DeletePod(ctx context.Context, namespace string, podID string, object runtime.Object) error { 666 f.Lock() 667 defer f.Unlock() 668 f.DeletePodName = append(f.DeletePodName, podID) 669 if f.Err != nil { 670 return f.Err 671 } 672 return nil 673 } 674 675 func (f *FakePodControl) Clear() { 676 f.Lock() 677 defer f.Unlock() 678 f.DeletePodName = []string{} 679 f.Templates = []v1.PodTemplateSpec{} 680 f.ControllerRefs = []metav1.OwnerReference{} 681 f.Patches = [][]byte{} 682 f.CreateLimit = 0 683 f.CreateCallCount = 0 684 } 685 686 // ByLogging allows custom sorting of pods so the best one can be picked for getting its logs. 687 type ByLogging []*v1.Pod 688 689 func (s ByLogging) Len() int { return len(s) } 690 func (s ByLogging) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 691 692 func (s ByLogging) Less(i, j int) bool { 693 // 1. assigned < unassigned 694 if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) { 695 return len(s[i].Spec.NodeName) > 0 696 } 697 // 2. PodRunning < PodUnknown < PodPending 698 if s[i].Status.Phase != s[j].Status.Phase { 699 return podPhaseToOrdinal[s[i].Status.Phase] > podPhaseToOrdinal[s[j].Status.Phase] 700 } 701 // 3. ready < not ready 702 if podutil.IsPodReady(s[i]) != podutil.IsPodReady(s[j]) { 703 return podutil.IsPodReady(s[i]) 704 } 705 // TODO: take availability into account when we push minReadySeconds information from deployment into pods, 706 // see https://github.com/kubernetes/kubernetes/issues/22065 707 // 4. Been ready for more time < less time < empty time 708 if podutil.IsPodReady(s[i]) && podutil.IsPodReady(s[j]) { 709 readyTime1 := podReadyTime(s[i]) 710 readyTime2 := podReadyTime(s[j]) 711 if !readyTime1.Equal(readyTime2) { 712 return afterOrZero(readyTime2, readyTime1) 713 } 714 } 715 // 5. Pods with containers with higher restart counts < lower restart counts 716 if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) { 717 return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j]) 718 } 719 // 6. older pods < newer pods < empty timestamp pods 720 if !s[i].CreationTimestamp.Equal(&s[j].CreationTimestamp) { 721 return afterOrZero(&s[j].CreationTimestamp, &s[i].CreationTimestamp) 722 } 723 return false 724 } 725 726 // ActivePods type allows custom sorting of pods so a controller can pick the best ones to delete. 727 type ActivePods []*v1.Pod 728 729 func (s ActivePods) Len() int { return len(s) } 730 func (s ActivePods) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 731 732 func (s ActivePods) Less(i, j int) bool { 733 // 1. Unassigned < assigned 734 // If only one of the pods is unassigned, the unassigned one is smaller 735 if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) { 736 return len(s[i].Spec.NodeName) == 0 737 } 738 // 2. PodPending < PodUnknown < PodRunning 739 if podPhaseToOrdinal[s[i].Status.Phase] != podPhaseToOrdinal[s[j].Status.Phase] { 740 return podPhaseToOrdinal[s[i].Status.Phase] < podPhaseToOrdinal[s[j].Status.Phase] 741 } 742 // 3. Not ready < ready 743 // If only one of the pods is not ready, the not ready one is smaller 744 if podutil.IsPodReady(s[i]) != podutil.IsPodReady(s[j]) { 745 return !podutil.IsPodReady(s[i]) 746 } 747 // TODO: take availability into account when we push minReadySeconds information from deployment into pods, 748 // see https://github.com/kubernetes/kubernetes/issues/22065 749 // 4. Been ready for empty time < less time < more time 750 // If both pods are ready, the latest ready one is smaller 751 if podutil.IsPodReady(s[i]) && podutil.IsPodReady(s[j]) { 752 readyTime1 := podReadyTime(s[i]) 753 readyTime2 := podReadyTime(s[j]) 754 if !readyTime1.Equal(readyTime2) { 755 return afterOrZero(readyTime1, readyTime2) 756 } 757 } 758 // 5. Pods with containers with higher restart counts < lower restart counts 759 if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) { 760 return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j]) 761 } 762 // 6. Empty creation time pods < newer pods < older pods 763 if !s[i].CreationTimestamp.Equal(&s[j].CreationTimestamp) { 764 return afterOrZero(&s[i].CreationTimestamp, &s[j].CreationTimestamp) 765 } 766 return false 767 } 768 769 // ActivePodsWithRanks is a sortable list of pods and a list of corresponding 770 // ranks which will be considered during sorting. The two lists must have equal 771 // length. After sorting, the pods will be ordered as follows, applying each 772 // rule in turn until one matches: 773 // 774 // 1. If only one of the pods is assigned to a node, the pod that is not 775 // assigned comes before the pod that is. 776 // 2. If the pods' phases differ, a pending pod comes before a pod whose phase 777 // is unknown, and a pod whose phase is unknown comes before a running pod. 778 // 3. If exactly one of the pods is ready, the pod that is not ready comes 779 // before the ready pod. 780 // 4. If controller.kubernetes.io/pod-deletion-cost annotation is set, then 781 // the pod with the lower value will come first. 782 // 5. If the pods' ranks differ, the pod with greater rank comes before the pod 783 // with lower rank. 784 // 6. If both pods are ready but have not been ready for the same amount of 785 // time, the pod that has been ready for a shorter amount of time comes 786 // before the pod that has been ready for longer. 787 // 7. If one pod has a container that has restarted more than any container in 788 // the other pod, the pod with the container with more restarts comes 789 // before the other pod. 790 // 8. If the pods' creation times differ, the pod that was created more recently 791 // comes before the older pod. 792 // 793 // In 6 and 8, times are compared in a logarithmic scale. This allows a level 794 // of randomness among equivalent Pods when sorting. If two pods have the same 795 // logarithmic rank, they are sorted by UUID to provide a pseudorandom order. 796 // 797 // If none of these rules matches, the second pod comes before the first pod. 798 // 799 // The intention of this ordering is to put pods that should be preferred for 800 // deletion first in the list. 801 type ActivePodsWithRanks struct { 802 // Pods is a list of pods. 803 Pods []*v1.Pod 804 805 // Rank is a ranking of pods. This ranking is used during sorting when 806 // comparing two pods that are both scheduled, in the same phase, and 807 // having the same ready status. 808 Rank []int 809 810 // Now is a reference timestamp for doing logarithmic timestamp comparisons. 811 // If zero, comparison happens without scaling. 812 Now metav1.Time 813 } 814 815 func (s ActivePodsWithRanks) Len() int { 816 return len(s.Pods) 817 } 818 819 func (s ActivePodsWithRanks) Swap(i, j int) { 820 s.Pods[i], s.Pods[j] = s.Pods[j], s.Pods[i] 821 s.Rank[i], s.Rank[j] = s.Rank[j], s.Rank[i] 822 } 823 824 // Less compares two pods with corresponding ranks and returns true if the first 825 // one should be preferred for deletion. 826 func (s ActivePodsWithRanks) Less(i, j int) bool { 827 // 1. Unassigned < assigned 828 // If only one of the pods is unassigned, the unassigned one is smaller 829 if s.Pods[i].Spec.NodeName != s.Pods[j].Spec.NodeName && (len(s.Pods[i].Spec.NodeName) == 0 || len(s.Pods[j].Spec.NodeName) == 0) { 830 return len(s.Pods[i].Spec.NodeName) == 0 831 } 832 // 2. PodPending < PodUnknown < PodRunning 833 if podPhaseToOrdinal[s.Pods[i].Status.Phase] != podPhaseToOrdinal[s.Pods[j].Status.Phase] { 834 return podPhaseToOrdinal[s.Pods[i].Status.Phase] < podPhaseToOrdinal[s.Pods[j].Status.Phase] 835 } 836 // 3. Not ready < ready 837 // If only one of the pods is not ready, the not ready one is smaller 838 if podutil.IsPodReady(s.Pods[i]) != podutil.IsPodReady(s.Pods[j]) { 839 return !podutil.IsPodReady(s.Pods[i]) 840 } 841 842 // 4. lower pod-deletion-cost < higher pod-deletion cost 843 if utilfeature.DefaultFeatureGate.Enabled(features.PodDeletionCost) { 844 pi, _ := helper.GetDeletionCostFromPodAnnotations(s.Pods[i].Annotations) 845 pj, _ := helper.GetDeletionCostFromPodAnnotations(s.Pods[j].Annotations) 846 if pi != pj { 847 return pi < pj 848 } 849 } 850 851 // 5. Doubled up < not doubled up 852 // If one of the two pods is on the same node as one or more additional 853 // ready pods that belong to the same replicaset, whichever pod has more 854 // colocated ready pods is less 855 if s.Rank[i] != s.Rank[j] { 856 return s.Rank[i] > s.Rank[j] 857 } 858 // TODO: take availability into account when we push minReadySeconds information from deployment into pods, 859 // see https://github.com/kubernetes/kubernetes/issues/22065 860 // 6. Been ready for empty time < less time < more time 861 // If both pods are ready, the latest ready one is smaller 862 if podutil.IsPodReady(s.Pods[i]) && podutil.IsPodReady(s.Pods[j]) { 863 readyTime1 := podReadyTime(s.Pods[i]) 864 readyTime2 := podReadyTime(s.Pods[j]) 865 if !readyTime1.Equal(readyTime2) { 866 if !utilfeature.DefaultFeatureGate.Enabled(features.LogarithmicScaleDown) { 867 return afterOrZero(readyTime1, readyTime2) 868 } else { 869 if s.Now.IsZero() || readyTime1.IsZero() || readyTime2.IsZero() { 870 return afterOrZero(readyTime1, readyTime2) 871 } 872 rankDiff := logarithmicRankDiff(*readyTime1, *readyTime2, s.Now) 873 if rankDiff == 0 { 874 return s.Pods[i].UID < s.Pods[j].UID 875 } 876 return rankDiff < 0 877 } 878 } 879 } 880 // 7. Pods with containers with higher restart counts < lower restart counts 881 if maxContainerRestarts(s.Pods[i]) != maxContainerRestarts(s.Pods[j]) { 882 return maxContainerRestarts(s.Pods[i]) > maxContainerRestarts(s.Pods[j]) 883 } 884 // 8. Empty creation time pods < newer pods < older pods 885 if !s.Pods[i].CreationTimestamp.Equal(&s.Pods[j].CreationTimestamp) { 886 if !utilfeature.DefaultFeatureGate.Enabled(features.LogarithmicScaleDown) { 887 return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp) 888 } else { 889 if s.Now.IsZero() || s.Pods[i].CreationTimestamp.IsZero() || s.Pods[j].CreationTimestamp.IsZero() { 890 return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp) 891 } 892 rankDiff := logarithmicRankDiff(s.Pods[i].CreationTimestamp, s.Pods[j].CreationTimestamp, s.Now) 893 if rankDiff == 0 { 894 return s.Pods[i].UID < s.Pods[j].UID 895 } 896 return rankDiff < 0 897 } 898 } 899 return false 900 } 901 902 // afterOrZero checks if time t1 is after time t2; if one of them 903 // is zero, the zero time is seen as after non-zero time. 904 func afterOrZero(t1, t2 *metav1.Time) bool { 905 if t1.Time.IsZero() || t2.Time.IsZero() { 906 return t1.Time.IsZero() 907 } 908 return t1.After(t2.Time) 909 } 910 911 // logarithmicRankDiff calculates the base-2 logarithmic ranks of 2 timestamps, 912 // compared to the current timestamp 913 func logarithmicRankDiff(t1, t2, now metav1.Time) int64 { 914 d1 := now.Sub(t1.Time) 915 d2 := now.Sub(t2.Time) 916 r1 := int64(-1) 917 r2 := int64(-1) 918 if d1 > 0 { 919 r1 = int64(math.Log2(float64(d1))) 920 } 921 if d2 > 0 { 922 r2 = int64(math.Log2(float64(d2))) 923 } 924 return r1 - r2 925 } 926 927 func podReadyTime(pod *v1.Pod) *metav1.Time { 928 if podutil.IsPodReady(pod) { 929 for _, c := range pod.Status.Conditions { 930 // we only care about pod ready conditions 931 if c.Type == v1.PodReady && c.Status == v1.ConditionTrue { 932 return &c.LastTransitionTime 933 } 934 } 935 } 936 return &metav1.Time{} 937 } 938 939 func maxContainerRestarts(pod *v1.Pod) int { 940 maxRestarts := 0 941 for _, c := range pod.Status.ContainerStatuses { 942 maxRestarts = max(maxRestarts, int(c.RestartCount)) 943 } 944 return maxRestarts 945 } 946 947 // FilterActivePods returns pods that have not terminated. 948 func FilterActivePods(logger klog.Logger, pods []*v1.Pod) []*v1.Pod { 949 var result []*v1.Pod 950 for _, p := range pods { 951 if IsPodActive(p) { 952 result = append(result, p) 953 } else { 954 logger.V(4).Info("Ignoring inactive pod", "pod", klog.KObj(p), "phase", p.Status.Phase, "deletionTime", klog.SafePtr(p.DeletionTimestamp)) 955 } 956 } 957 return result 958 } 959 960 func FilterTerminatingPods(pods []*v1.Pod) []*v1.Pod { 961 var result []*v1.Pod 962 for _, p := range pods { 963 if IsPodTerminating(p) { 964 result = append(result, p) 965 } 966 } 967 return result 968 } 969 970 func CountTerminatingPods(pods []*v1.Pod) int32 { 971 numberOfTerminatingPods := 0 972 for _, p := range pods { 973 if IsPodTerminating(p) { 974 numberOfTerminatingPods += 1 975 } 976 } 977 return int32(numberOfTerminatingPods) 978 } 979 980 func IsPodActive(p *v1.Pod) bool { 981 return v1.PodSucceeded != p.Status.Phase && 982 v1.PodFailed != p.Status.Phase && 983 p.DeletionTimestamp == nil 984 } 985 986 func IsPodTerminating(p *v1.Pod) bool { 987 return !podutil.IsPodTerminal(p) && 988 p.DeletionTimestamp != nil 989 } 990 991 // FilterActiveReplicaSets returns replica sets that have (or at least ought to have) pods. 992 func FilterActiveReplicaSets(replicaSets []*apps.ReplicaSet) []*apps.ReplicaSet { 993 activeFilter := func(rs *apps.ReplicaSet) bool { 994 return rs != nil && *(rs.Spec.Replicas) > 0 995 } 996 return FilterReplicaSets(replicaSets, activeFilter) 997 } 998 999 type filterRS func(rs *apps.ReplicaSet) bool 1000 1001 // FilterReplicaSets returns replica sets that are filtered by filterFn (all returned ones should match filterFn). 1002 func FilterReplicaSets(RSes []*apps.ReplicaSet, filterFn filterRS) []*apps.ReplicaSet { 1003 var filtered []*apps.ReplicaSet 1004 for i := range RSes { 1005 if filterFn(RSes[i]) { 1006 filtered = append(filtered, RSes[i]) 1007 } 1008 } 1009 return filtered 1010 } 1011 1012 // PodKey returns a key unique to the given pod within a cluster. 1013 // It's used so we consistently use the same key scheme in this module. 1014 // It does exactly what cache.MetaNamespaceKeyFunc would have done 1015 // except there's not possibility for error since we know the exact type. 1016 func PodKey(pod *v1.Pod) string { 1017 return fmt.Sprintf("%v/%v", pod.Namespace, pod.Name) 1018 } 1019 1020 // ControllersByCreationTimestamp sorts a list of ReplicationControllers by creation timestamp, using their names as a tie breaker. 1021 type ControllersByCreationTimestamp []*v1.ReplicationController 1022 1023 func (o ControllersByCreationTimestamp) Len() int { return len(o) } 1024 func (o ControllersByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] } 1025 func (o ControllersByCreationTimestamp) Less(i, j int) bool { 1026 if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) { 1027 return o[i].Name < o[j].Name 1028 } 1029 return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp) 1030 } 1031 1032 // ReplicaSetsByCreationTimestamp sorts a list of ReplicaSet by creation timestamp, using their names as a tie breaker. 1033 type ReplicaSetsByCreationTimestamp []*apps.ReplicaSet 1034 1035 func (o ReplicaSetsByCreationTimestamp) Len() int { return len(o) } 1036 func (o ReplicaSetsByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] } 1037 func (o ReplicaSetsByCreationTimestamp) Less(i, j int) bool { 1038 if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) { 1039 return o[i].Name < o[j].Name 1040 } 1041 return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp) 1042 } 1043 1044 // ReplicaSetsBySizeOlder sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker. 1045 // By using the creation timestamp, this sorts from old to new replica sets. 1046 type ReplicaSetsBySizeOlder []*apps.ReplicaSet 1047 1048 func (o ReplicaSetsBySizeOlder) Len() int { return len(o) } 1049 func (o ReplicaSetsBySizeOlder) Swap(i, j int) { o[i], o[j] = o[j], o[i] } 1050 func (o ReplicaSetsBySizeOlder) Less(i, j int) bool { 1051 if *(o[i].Spec.Replicas) == *(o[j].Spec.Replicas) { 1052 return ReplicaSetsByCreationTimestamp(o).Less(i, j) 1053 } 1054 return *(o[i].Spec.Replicas) > *(o[j].Spec.Replicas) 1055 } 1056 1057 // ReplicaSetsBySizeNewer sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker. 1058 // By using the creation timestamp, this sorts from new to old replica sets. 1059 type ReplicaSetsBySizeNewer []*apps.ReplicaSet 1060 1061 func (o ReplicaSetsBySizeNewer) Len() int { return len(o) } 1062 func (o ReplicaSetsBySizeNewer) Swap(i, j int) { o[i], o[j] = o[j], o[i] } 1063 func (o ReplicaSetsBySizeNewer) Less(i, j int) bool { 1064 if *(o[i].Spec.Replicas) == *(o[j].Spec.Replicas) { 1065 return ReplicaSetsByCreationTimestamp(o).Less(j, i) 1066 } 1067 return *(o[i].Spec.Replicas) > *(o[j].Spec.Replicas) 1068 } 1069 1070 // AddOrUpdateTaintOnNode add taints to the node. If taint was added into node, it'll issue API calls 1071 // to update nodes; otherwise, no API calls. Return error if any. 1072 func AddOrUpdateTaintOnNode(ctx context.Context, c clientset.Interface, nodeName string, taints ...*v1.Taint) error { 1073 if len(taints) == 0 { 1074 return nil 1075 } 1076 firstTry := true 1077 return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error { 1078 var err error 1079 var oldNode *v1.Node 1080 // First we try getting node from the API server cache, as it's cheaper. If it fails 1081 // we get it from etcd to be sure to have fresh data. 1082 option := metav1.GetOptions{} 1083 if firstTry { 1084 option.ResourceVersion = "0" 1085 firstTry = false 1086 } 1087 oldNode, err = c.CoreV1().Nodes().Get(ctx, nodeName, option) 1088 if err != nil { 1089 return err 1090 } 1091 1092 var newNode *v1.Node 1093 oldNodeCopy := oldNode 1094 updated := false 1095 for _, taint := range taints { 1096 curNewNode, ok, err := taintutils.AddOrUpdateTaint(oldNodeCopy, taint) 1097 if err != nil { 1098 return fmt.Errorf("failed to update taint of node") 1099 } 1100 updated = updated || ok 1101 newNode = curNewNode 1102 oldNodeCopy = curNewNode 1103 } 1104 if !updated { 1105 return nil 1106 } 1107 return PatchNodeTaints(ctx, c, nodeName, oldNode, newNode) 1108 }) 1109 } 1110 1111 // RemoveTaintOffNode is for cleaning up taints temporarily added to node, 1112 // won't fail if target taint doesn't exist or has been removed. 1113 // If passed a node it'll check if there's anything to be done, if taint is not present it won't issue 1114 // any API calls. 1115 func RemoveTaintOffNode(ctx context.Context, c clientset.Interface, nodeName string, node *v1.Node, taints ...*v1.Taint) error { 1116 if len(taints) == 0 { 1117 return nil 1118 } 1119 // Short circuit for limiting amount of API calls. 1120 if node != nil { 1121 match := false 1122 for _, taint := range taints { 1123 if taintutils.TaintExists(node.Spec.Taints, taint) { 1124 match = true 1125 break 1126 } 1127 } 1128 if !match { 1129 return nil 1130 } 1131 } 1132 1133 firstTry := true 1134 return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error { 1135 var err error 1136 var oldNode *v1.Node 1137 // First we try getting node from the API server cache, as it's cheaper. If it fails 1138 // we get it from etcd to be sure to have fresh data. 1139 option := metav1.GetOptions{} 1140 if firstTry { 1141 option.ResourceVersion = "0" 1142 firstTry = false 1143 } 1144 oldNode, err = c.CoreV1().Nodes().Get(ctx, nodeName, option) 1145 if err != nil { 1146 return err 1147 } 1148 1149 var newNode *v1.Node 1150 oldNodeCopy := oldNode 1151 updated := false 1152 for _, taint := range taints { 1153 curNewNode, ok, err := taintutils.RemoveTaint(oldNodeCopy, taint) 1154 if err != nil { 1155 return fmt.Errorf("failed to remove taint of node") 1156 } 1157 updated = updated || ok 1158 newNode = curNewNode 1159 oldNodeCopy = curNewNode 1160 } 1161 if !updated { 1162 return nil 1163 } 1164 return PatchNodeTaints(ctx, c, nodeName, oldNode, newNode) 1165 }) 1166 } 1167 1168 // PatchNodeTaints patches node's taints. 1169 func PatchNodeTaints(ctx context.Context, c clientset.Interface, nodeName string, oldNode *v1.Node, newNode *v1.Node) error { 1170 // Strip base diff node from RV to ensure that our Patch request will set RV to check for conflicts over .spec.taints. 1171 // This is needed because .spec.taints does not specify patchMergeKey and patchStrategy and adding them is no longer an option for compatibility reasons. 1172 // Using other Patch strategy works for adding new taints, however will not resolve problem with taint removal. 1173 oldNodeNoRV := oldNode.DeepCopy() 1174 oldNodeNoRV.ResourceVersion = "" 1175 oldDataNoRV, err := json.Marshal(&oldNodeNoRV) 1176 if err != nil { 1177 return fmt.Errorf("failed to marshal old node %#v for node %q: %v", oldNodeNoRV, nodeName, err) 1178 } 1179 1180 newTaints := newNode.Spec.Taints 1181 newNodeClone := oldNode.DeepCopy() 1182 newNodeClone.Spec.Taints = newTaints 1183 newData, err := json.Marshal(newNodeClone) 1184 if err != nil { 1185 return fmt.Errorf("failed to marshal new node %#v for node %q: %v", newNodeClone, nodeName, err) 1186 } 1187 1188 patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldDataNoRV, newData, v1.Node{}) 1189 if err != nil { 1190 return fmt.Errorf("failed to create patch for node %q: %v", nodeName, err) 1191 } 1192 1193 _, err = c.CoreV1().Nodes().Patch(ctx, nodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) 1194 return err 1195 } 1196 1197 // ComputeHash returns a hash value calculated from pod template and 1198 // a collisionCount to avoid hash collision. The hash will be safe encoded to 1199 // avoid bad words. 1200 func ComputeHash(template *v1.PodTemplateSpec, collisionCount *int32) string { 1201 podTemplateSpecHasher := fnv.New32a() 1202 hashutil.DeepHashObject(podTemplateSpecHasher, *template) 1203 1204 // Add collisionCount in the hash if it exists. 1205 if collisionCount != nil { 1206 collisionCountBytes := make([]byte, 8) 1207 binary.LittleEndian.PutUint32(collisionCountBytes, uint32(*collisionCount)) 1208 podTemplateSpecHasher.Write(collisionCountBytes) 1209 } 1210 1211 return rand.SafeEncodeString(fmt.Sprint(podTemplateSpecHasher.Sum32())) 1212 } 1213 1214 func AddOrUpdateLabelsOnNode(kubeClient clientset.Interface, nodeName string, labelsToUpdate map[string]string) error { 1215 firstTry := true 1216 return clientretry.RetryOnConflict(UpdateLabelBackoff, func() error { 1217 var err error 1218 var node *v1.Node 1219 // First we try getting node from the API server cache, as it's cheaper. If it fails 1220 // we get it from etcd to be sure to have fresh data. 1221 option := metav1.GetOptions{} 1222 if firstTry { 1223 option.ResourceVersion = "0" 1224 firstTry = false 1225 } 1226 node, err = kubeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, option) 1227 if err != nil { 1228 return err 1229 } 1230 1231 // Make a copy of the node and update the labels. 1232 newNode := node.DeepCopy() 1233 if newNode.Labels == nil { 1234 newNode.Labels = make(map[string]string) 1235 } 1236 for key, value := range labelsToUpdate { 1237 newNode.Labels[key] = value 1238 } 1239 1240 oldData, err := json.Marshal(node) 1241 if err != nil { 1242 return fmt.Errorf("failed to marshal the existing node %#v: %v", node, err) 1243 } 1244 newData, err := json.Marshal(newNode) 1245 if err != nil { 1246 return fmt.Errorf("failed to marshal the new node %#v: %v", newNode, err) 1247 } 1248 patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, &v1.Node{}) 1249 if err != nil { 1250 return fmt.Errorf("failed to create a two-way merge patch: %v", err) 1251 } 1252 if _, err := kubeClient.CoreV1().Nodes().Patch(context.TODO(), node.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}); err != nil { 1253 return fmt.Errorf("failed to patch the node: %v", err) 1254 } 1255 return nil 1256 }) 1257 }