k8s.io/kubernetes@v1.29.3/pkg/controller/controller_utils.go (about) 1 /* 2 Copyright 2014 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package controller 18 19 import ( 20 "context" 21 "encoding/binary" 22 "encoding/json" 23 "fmt" 24 "hash/fnv" 25 "math" 26 "sync" 27 "sync/atomic" 28 "time" 29 30 apps "k8s.io/api/apps/v1" 31 v1 "k8s.io/api/core/v1" 32 apierrors "k8s.io/apimachinery/pkg/api/errors" 33 "k8s.io/apimachinery/pkg/api/meta" 34 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 35 "k8s.io/apimachinery/pkg/labels" 36 "k8s.io/apimachinery/pkg/runtime" 37 "k8s.io/apimachinery/pkg/types" 38 "k8s.io/apimachinery/pkg/util/rand" 39 "k8s.io/apimachinery/pkg/util/sets" 40 "k8s.io/apimachinery/pkg/util/strategicpatch" 41 "k8s.io/apimachinery/pkg/util/wait" 42 utilfeature "k8s.io/apiserver/pkg/util/feature" 43 clientset "k8s.io/client-go/kubernetes" 44 "k8s.io/client-go/tools/cache" 45 "k8s.io/client-go/tools/record" 46 clientretry "k8s.io/client-go/util/retry" 47 podutil "k8s.io/kubernetes/pkg/api/v1/pod" 48 "k8s.io/kubernetes/pkg/apis/core/helper" 49 _ "k8s.io/kubernetes/pkg/apis/core/install" 50 "k8s.io/kubernetes/pkg/apis/core/validation" 51 "k8s.io/kubernetes/pkg/features" 52 hashutil "k8s.io/kubernetes/pkg/util/hash" 53 taintutils "k8s.io/kubernetes/pkg/util/taints" 54 "k8s.io/utils/clock" 55 "k8s.io/utils/integer" 56 57 "k8s.io/klog/v2" 58 ) 59 60 const ( 61 // If a watch drops a delete event for a pod, it'll take this long 62 // before a dormant controller waiting for those packets is woken up anyway. It is 63 // specifically targeted at the case where some problem prevents an update 64 // of expectations, without it the controller could stay asleep forever. This should 65 // be set based on the expected latency of watch events. 66 // 67 // Currently a controller can service (create *and* observe the watch events for said 68 // creation) about 10 pods a second, so it takes about 1 min to service 69 // 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s 70 // latency/pod at the scale of 3000 pods over 100 nodes. 71 ExpectationsTimeout = 5 * time.Minute 72 // When batching pod creates, SlowStartInitialBatchSize is the size of the 73 // initial batch. The size of each successive batch is twice the size of 74 // the previous batch. For example, for a value of 1, batch sizes would be 75 // 1, 2, 4, 8, ... and for a value of 10, batch sizes would be 76 // 10, 20, 40, 80, ... Setting the value higher means that quota denials 77 // will result in more doomed API calls and associated event spam. Setting 78 // the value lower will result in more API call round trip periods for 79 // large batches. 80 // 81 // Given a number of pods to start "N": 82 // The number of doomed calls per sync once quota is exceeded is given by: 83 // min(N,SlowStartInitialBatchSize) 84 // The number of batches is given by: 85 // 1+floor(log_2(ceil(N/SlowStartInitialBatchSize))) 86 SlowStartInitialBatchSize = 1 87 ) 88 89 var UpdateTaintBackoff = wait.Backoff{ 90 Steps: 5, 91 Duration: 100 * time.Millisecond, 92 Jitter: 1.0, 93 } 94 95 var UpdateLabelBackoff = wait.Backoff{ 96 Steps: 5, 97 Duration: 100 * time.Millisecond, 98 Jitter: 1.0, 99 } 100 101 var ( 102 KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc 103 podPhaseToOrdinal = map[v1.PodPhase]int{v1.PodPending: 0, v1.PodUnknown: 1, v1.PodRunning: 2} 104 ) 105 106 type ResyncPeriodFunc func() time.Duration 107 108 // Returns 0 for resyncPeriod in case resyncing is not needed. 109 func NoResyncPeriodFunc() time.Duration { 110 return 0 111 } 112 113 // StaticResyncPeriodFunc returns the resync period specified 114 func StaticResyncPeriodFunc(resyncPeriod time.Duration) ResyncPeriodFunc { 115 return func() time.Duration { 116 return resyncPeriod 117 } 118 } 119 120 // Expectations are a way for controllers to tell the controller manager what they expect. eg: 121 // ControllerExpectations: { 122 // controller1: expects 2 adds in 2 minutes 123 // controller2: expects 2 dels in 2 minutes 124 // controller3: expects -1 adds in 2 minutes => controller3's expectations have already been met 125 // } 126 // 127 // Implementation: 128 // ControlleeExpectation = pair of atomic counters to track controllee's creation/deletion 129 // ControllerExpectationsStore = TTLStore + a ControlleeExpectation per controller 130 // 131 // * Once set expectations can only be lowered 132 // * A controller isn't synced till its expectations are either fulfilled, or expire 133 // * Controllers that don't set expectations will get woken up for every matching controllee 134 135 // ExpKeyFunc to parse out the key from a ControlleeExpectation 136 var ExpKeyFunc = func(obj interface{}) (string, error) { 137 if e, ok := obj.(*ControlleeExpectations); ok { 138 return e.key, nil 139 } 140 return "", fmt.Errorf("could not find key for obj %#v", obj) 141 } 142 143 // ControllerExpectationsInterface is an interface that allows users to set and wait on expectations. 144 // Only abstracted out for testing. 145 // Warning: if using KeyFunc it is not safe to use a single ControllerExpectationsInterface with different 146 // types of controllers, because the keys might conflict across types. 147 type ControllerExpectationsInterface interface { 148 GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) 149 SatisfiedExpectations(logger klog.Logger, controllerKey string) bool 150 DeleteExpectations(logger klog.Logger, controllerKey string) 151 SetExpectations(logger klog.Logger, controllerKey string, add, del int) error 152 ExpectCreations(logger klog.Logger, controllerKey string, adds int) error 153 ExpectDeletions(logger klog.Logger, controllerKey string, dels int) error 154 CreationObserved(logger klog.Logger, controllerKey string) 155 DeletionObserved(logger klog.Logger, controllerKey string) 156 RaiseExpectations(logger klog.Logger, controllerKey string, add, del int) 157 LowerExpectations(logger klog.Logger, controllerKey string, add, del int) 158 } 159 160 // ControllerExpectations is a cache mapping controllers to what they expect to see before being woken up for a sync. 161 type ControllerExpectations struct { 162 cache.Store 163 } 164 165 // GetExpectations returns the ControlleeExpectations of the given controller. 166 func (r *ControllerExpectations) GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) { 167 exp, exists, err := r.GetByKey(controllerKey) 168 if err == nil && exists { 169 return exp.(*ControlleeExpectations), true, nil 170 } 171 return nil, false, err 172 } 173 174 // DeleteExpectations deletes the expectations of the given controller from the TTLStore. 175 func (r *ControllerExpectations) DeleteExpectations(logger klog.Logger, controllerKey string) { 176 if exp, exists, err := r.GetByKey(controllerKey); err == nil && exists { 177 if err := r.Delete(exp); err != nil { 178 179 logger.V(2).Info("Error deleting expectations", "controller", controllerKey, "err", err) 180 } 181 } 182 } 183 184 // SatisfiedExpectations returns true if the required adds/dels for the given controller have been observed. 185 // Add/del counts are established by the controller at sync time, and updated as controllees are observed by the controller 186 // manager. 187 func (r *ControllerExpectations) SatisfiedExpectations(logger klog.Logger, controllerKey string) bool { 188 if exp, exists, err := r.GetExpectations(controllerKey); exists { 189 if exp.Fulfilled() { 190 logger.V(4).Info("Controller expectations fulfilled", "expectations", exp) 191 return true 192 } else if exp.isExpired() { 193 logger.V(4).Info("Controller expectations expired", "expectations", exp) 194 return true 195 } else { 196 logger.V(4).Info("Controller still waiting on expectations", "expectations", exp) 197 return false 198 } 199 } else if err != nil { 200 logger.V(2).Info("Error encountered while checking expectations, forcing sync", "err", err) 201 } else { 202 // When a new controller is created, it doesn't have expectations. 203 // When it doesn't see expected watch events for > TTL, the expectations expire. 204 // - In this case it wakes up, creates/deletes controllees, and sets expectations again. 205 // When it has satisfied expectations and no controllees need to be created/destroyed > TTL, the expectations expire. 206 // - In this case it continues without setting expectations till it needs to create/delete controllees. 207 logger.V(4).Info("Controller either never recorded expectations, or the ttl expired", "controller", controllerKey) 208 } 209 // Trigger a sync if we either encountered and error (which shouldn't happen since we're 210 // getting from local store) or this controller hasn't established expectations. 211 return true 212 } 213 214 // TODO: Extend ExpirationCache to support explicit expiration. 215 // TODO: Make this possible to disable in tests. 216 // TODO: Support injection of clock. 217 func (exp *ControlleeExpectations) isExpired() bool { 218 return clock.RealClock{}.Since(exp.timestamp) > ExpectationsTimeout 219 } 220 221 // SetExpectations registers new expectations for the given controller. Forgets existing expectations. 222 func (r *ControllerExpectations) SetExpectations(logger klog.Logger, controllerKey string, add, del int) error { 223 exp := &ControlleeExpectations{add: int64(add), del: int64(del), key: controllerKey, timestamp: clock.RealClock{}.Now()} 224 logger.V(4).Info("Setting expectations", "expectations", exp) 225 return r.Add(exp) 226 } 227 228 func (r *ControllerExpectations) ExpectCreations(logger klog.Logger, controllerKey string, adds int) error { 229 return r.SetExpectations(logger, controllerKey, adds, 0) 230 } 231 232 func (r *ControllerExpectations) ExpectDeletions(logger klog.Logger, controllerKey string, dels int) error { 233 return r.SetExpectations(logger, controllerKey, 0, dels) 234 } 235 236 // Decrements the expectation counts of the given controller. 237 func (r *ControllerExpectations) LowerExpectations(logger klog.Logger, controllerKey string, add, del int) { 238 if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists { 239 exp.Add(int64(-add), int64(-del)) 240 // The expectations might've been modified since the update on the previous line. 241 logger.V(4).Info("Lowered expectations", "expectations", exp) 242 } 243 } 244 245 // Increments the expectation counts of the given controller. 246 func (r *ControllerExpectations) RaiseExpectations(logger klog.Logger, controllerKey string, add, del int) { 247 if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists { 248 exp.Add(int64(add), int64(del)) 249 // The expectations might've been modified since the update on the previous line. 250 logger.V(4).Info("Raised expectations", "expectations", exp) 251 } 252 } 253 254 // CreationObserved atomically decrements the `add` expectation count of the given controller. 255 func (r *ControllerExpectations) CreationObserved(logger klog.Logger, controllerKey string) { 256 r.LowerExpectations(logger, controllerKey, 1, 0) 257 } 258 259 // DeletionObserved atomically decrements the `del` expectation count of the given controller. 260 func (r *ControllerExpectations) DeletionObserved(logger klog.Logger, controllerKey string) { 261 r.LowerExpectations(logger, controllerKey, 0, 1) 262 } 263 264 // ControlleeExpectations track controllee creates/deletes. 265 type ControlleeExpectations struct { 266 // Important: Since these two int64 fields are using sync/atomic, they have to be at the top of the struct due to a bug on 32-bit platforms 267 // See: https://golang.org/pkg/sync/atomic/ for more information 268 add int64 269 del int64 270 key string 271 timestamp time.Time 272 } 273 274 // Add increments the add and del counters. 275 func (e *ControlleeExpectations) Add(add, del int64) { 276 atomic.AddInt64(&e.add, add) 277 atomic.AddInt64(&e.del, del) 278 } 279 280 // Fulfilled returns true if this expectation has been fulfilled. 281 func (e *ControlleeExpectations) Fulfilled() bool { 282 // TODO: think about why this line being atomic doesn't matter 283 return atomic.LoadInt64(&e.add) <= 0 && atomic.LoadInt64(&e.del) <= 0 284 } 285 286 // GetExpectations returns the add and del expectations of the controllee. 287 func (e *ControlleeExpectations) GetExpectations() (int64, int64) { 288 return atomic.LoadInt64(&e.add), atomic.LoadInt64(&e.del) 289 } 290 291 // MarshalLog makes a thread-safe copy of the values of the expectations that 292 // can be used for logging. 293 func (e *ControlleeExpectations) MarshalLog() interface{} { 294 return struct { 295 add int64 296 del int64 297 key string 298 }{ 299 add: atomic.LoadInt64(&e.add), 300 del: atomic.LoadInt64(&e.del), 301 key: e.key, 302 } 303 } 304 305 // NewControllerExpectations returns a store for ControllerExpectations. 306 func NewControllerExpectations() *ControllerExpectations { 307 return &ControllerExpectations{cache.NewStore(ExpKeyFunc)} 308 } 309 310 // UIDSetKeyFunc to parse out the key from a UIDSet. 311 var UIDSetKeyFunc = func(obj interface{}) (string, error) { 312 if u, ok := obj.(*UIDSet); ok { 313 return u.key, nil 314 } 315 return "", fmt.Errorf("could not find key for obj %#v", obj) 316 } 317 318 // UIDSet holds a key and a set of UIDs. Used by the 319 // UIDTrackingControllerExpectations to remember which UID it has seen/still 320 // waiting for. 321 type UIDSet struct { 322 sets.String 323 key string 324 } 325 326 // UIDTrackingControllerExpectations tracks the UID of the pods it deletes. 327 // This cache is needed over plain old expectations to safely handle graceful 328 // deletion. The desired behavior is to treat an update that sets the 329 // DeletionTimestamp on an object as a delete. To do so consistently, one needs 330 // to remember the expected deletes so they aren't double counted. 331 // TODO: Track creates as well (#22599) 332 type UIDTrackingControllerExpectations struct { 333 ControllerExpectationsInterface 334 // TODO: There is a much nicer way to do this that involves a single store, 335 // a lock per entry, and a ControlleeExpectationsInterface type. 336 uidStoreLock sync.Mutex 337 // Store used for the UIDs associated with any expectation tracked via the 338 // ControllerExpectationsInterface. 339 uidStore cache.Store 340 } 341 342 // GetUIDs is a convenience method to avoid exposing the set of expected uids. 343 // The returned set is not thread safe, all modifications must be made holding 344 // the uidStoreLock. 345 func (u *UIDTrackingControllerExpectations) GetUIDs(controllerKey string) sets.String { 346 if uid, exists, err := u.uidStore.GetByKey(controllerKey); err == nil && exists { 347 return uid.(*UIDSet).String 348 } 349 return nil 350 } 351 352 // ExpectDeletions records expectations for the given deleteKeys, against the given controller. 353 func (u *UIDTrackingControllerExpectations) ExpectDeletions(logger klog.Logger, rcKey string, deletedKeys []string) error { 354 expectedUIDs := sets.NewString() 355 for _, k := range deletedKeys { 356 expectedUIDs.Insert(k) 357 } 358 logger.V(4).Info("Controller waiting on deletions", "controller", rcKey, "keys", deletedKeys) 359 u.uidStoreLock.Lock() 360 defer u.uidStoreLock.Unlock() 361 362 if existing := u.GetUIDs(rcKey); existing != nil && existing.Len() != 0 { 363 logger.Error(nil, "Clobbering existing delete keys", "keys", existing) 364 } 365 if err := u.uidStore.Add(&UIDSet{expectedUIDs, rcKey}); err != nil { 366 return err 367 } 368 return u.ControllerExpectationsInterface.ExpectDeletions(logger, rcKey, expectedUIDs.Len()) 369 } 370 371 // DeletionObserved records the given deleteKey as a deletion, for the given rc. 372 func (u *UIDTrackingControllerExpectations) DeletionObserved(logger klog.Logger, rcKey, deleteKey string) { 373 u.uidStoreLock.Lock() 374 defer u.uidStoreLock.Unlock() 375 376 uids := u.GetUIDs(rcKey) 377 if uids != nil && uids.Has(deleteKey) { 378 logger.V(4).Info("Controller received delete for pod", "controller", rcKey, "key", deleteKey) 379 u.ControllerExpectationsInterface.DeletionObserved(logger, rcKey) 380 uids.Delete(deleteKey) 381 } 382 } 383 384 // DeleteExpectations deletes the UID set and invokes DeleteExpectations on the 385 // underlying ControllerExpectationsInterface. 386 func (u *UIDTrackingControllerExpectations) DeleteExpectations(logger klog.Logger, rcKey string) { 387 u.uidStoreLock.Lock() 388 defer u.uidStoreLock.Unlock() 389 390 u.ControllerExpectationsInterface.DeleteExpectations(logger, rcKey) 391 if uidExp, exists, err := u.uidStore.GetByKey(rcKey); err == nil && exists { 392 if err := u.uidStore.Delete(uidExp); err != nil { 393 logger.V(2).Info("Error deleting uid expectations", "controller", rcKey, "err", err) 394 } 395 } 396 } 397 398 // NewUIDTrackingControllerExpectations returns a wrapper around 399 // ControllerExpectations that is aware of deleteKeys. 400 func NewUIDTrackingControllerExpectations(ce ControllerExpectationsInterface) *UIDTrackingControllerExpectations { 401 return &UIDTrackingControllerExpectations{ControllerExpectationsInterface: ce, uidStore: cache.NewStore(UIDSetKeyFunc)} 402 } 403 404 // Reasons for pod events 405 const ( 406 // FailedCreatePodReason is added in an event and in a replica set condition 407 // when a pod for a replica set is failed to be created. 408 FailedCreatePodReason = "FailedCreate" 409 // SuccessfulCreatePodReason is added in an event when a pod for a replica set 410 // is successfully created. 411 SuccessfulCreatePodReason = "SuccessfulCreate" 412 // FailedDeletePodReason is added in an event and in a replica set condition 413 // when a pod for a replica set is failed to be deleted. 414 FailedDeletePodReason = "FailedDelete" 415 // SuccessfulDeletePodReason is added in an event when a pod for a replica set 416 // is successfully deleted. 417 SuccessfulDeletePodReason = "SuccessfulDelete" 418 ) 419 420 // RSControlInterface is an interface that knows how to add or delete 421 // ReplicaSets, as well as increment or decrement them. It is used 422 // by the deployment controller to ease testing of actions that it takes. 423 type RSControlInterface interface { 424 PatchReplicaSet(ctx context.Context, namespace, name string, data []byte) error 425 } 426 427 // RealRSControl is the default implementation of RSControllerInterface. 428 type RealRSControl struct { 429 KubeClient clientset.Interface 430 Recorder record.EventRecorder 431 } 432 433 var _ RSControlInterface = &RealRSControl{} 434 435 func (r RealRSControl) PatchReplicaSet(ctx context.Context, namespace, name string, data []byte) error { 436 _, err := r.KubeClient.AppsV1().ReplicaSets(namespace).Patch(ctx, name, types.StrategicMergePatchType, data, metav1.PatchOptions{}) 437 return err 438 } 439 440 // TODO: merge the controller revision interface in controller_history.go with this one 441 // ControllerRevisionControlInterface is an interface that knows how to patch 442 // ControllerRevisions, as well as increment or decrement them. It is used 443 // by the daemonset controller to ease testing of actions that it takes. 444 type ControllerRevisionControlInterface interface { 445 PatchControllerRevision(ctx context.Context, namespace, name string, data []byte) error 446 } 447 448 // RealControllerRevisionControl is the default implementation of ControllerRevisionControlInterface. 449 type RealControllerRevisionControl struct { 450 KubeClient clientset.Interface 451 } 452 453 var _ ControllerRevisionControlInterface = &RealControllerRevisionControl{} 454 455 func (r RealControllerRevisionControl) PatchControllerRevision(ctx context.Context, namespace, name string, data []byte) error { 456 _, err := r.KubeClient.AppsV1().ControllerRevisions(namespace).Patch(ctx, name, types.StrategicMergePatchType, data, metav1.PatchOptions{}) 457 return err 458 } 459 460 // PodControlInterface is an interface that knows how to add or delete pods 461 // created as an interface to allow testing. 462 type PodControlInterface interface { 463 // CreatePods creates new pods according to the spec, and sets object as the pod's controller. 464 CreatePods(ctx context.Context, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error 465 // CreatePodsWithGenerateName creates new pods according to the spec, sets object as the pod's controller and sets pod's generateName. 466 CreatePodsWithGenerateName(ctx context.Context, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference, generateName string) error 467 // DeletePod deletes the pod identified by podID. 468 DeletePod(ctx context.Context, namespace string, podID string, object runtime.Object) error 469 // PatchPod patches the pod. 470 PatchPod(ctx context.Context, namespace, name string, data []byte) error 471 } 472 473 // RealPodControl is the default implementation of PodControlInterface. 474 type RealPodControl struct { 475 KubeClient clientset.Interface 476 Recorder record.EventRecorder 477 } 478 479 var _ PodControlInterface = &RealPodControl{} 480 481 func getPodsLabelSet(template *v1.PodTemplateSpec) labels.Set { 482 desiredLabels := make(labels.Set) 483 for k, v := range template.Labels { 484 desiredLabels[k] = v 485 } 486 return desiredLabels 487 } 488 489 func getPodsFinalizers(template *v1.PodTemplateSpec) []string { 490 desiredFinalizers := make([]string, len(template.Finalizers)) 491 copy(desiredFinalizers, template.Finalizers) 492 return desiredFinalizers 493 } 494 495 func getPodsAnnotationSet(template *v1.PodTemplateSpec) labels.Set { 496 desiredAnnotations := make(labels.Set) 497 for k, v := range template.Annotations { 498 desiredAnnotations[k] = v 499 } 500 return desiredAnnotations 501 } 502 503 func getPodsPrefix(controllerName string) string { 504 // use the dash (if the name isn't too long) to make the pod name a bit prettier 505 prefix := fmt.Sprintf("%s-", controllerName) 506 if len(validation.ValidatePodName(prefix, true)) != 0 { 507 prefix = controllerName 508 } 509 return prefix 510 } 511 512 func validateControllerRef(controllerRef *metav1.OwnerReference) error { 513 if controllerRef == nil { 514 return fmt.Errorf("controllerRef is nil") 515 } 516 if len(controllerRef.APIVersion) == 0 { 517 return fmt.Errorf("controllerRef has empty APIVersion") 518 } 519 if len(controllerRef.Kind) == 0 { 520 return fmt.Errorf("controllerRef has empty Kind") 521 } 522 if controllerRef.Controller == nil || !*controllerRef.Controller { 523 return fmt.Errorf("controllerRef.Controller is not set to true") 524 } 525 if controllerRef.BlockOwnerDeletion == nil || !*controllerRef.BlockOwnerDeletion { 526 return fmt.Errorf("controllerRef.BlockOwnerDeletion is not set") 527 } 528 return nil 529 } 530 531 func (r RealPodControl) CreatePods(ctx context.Context, namespace string, template *v1.PodTemplateSpec, controllerObject runtime.Object, controllerRef *metav1.OwnerReference) error { 532 return r.CreatePodsWithGenerateName(ctx, namespace, template, controllerObject, controllerRef, "") 533 } 534 535 func (r RealPodControl) CreatePodsWithGenerateName(ctx context.Context, namespace string, template *v1.PodTemplateSpec, controllerObject runtime.Object, controllerRef *metav1.OwnerReference, generateName string) error { 536 if err := validateControllerRef(controllerRef); err != nil { 537 return err 538 } 539 pod, err := GetPodFromTemplate(template, controllerObject, controllerRef) 540 if err != nil { 541 return err 542 } 543 if len(generateName) > 0 { 544 pod.ObjectMeta.GenerateName = generateName 545 } 546 return r.createPods(ctx, namespace, pod, controllerObject) 547 } 548 549 func (r RealPodControl) PatchPod(ctx context.Context, namespace, name string, data []byte) error { 550 _, err := r.KubeClient.CoreV1().Pods(namespace).Patch(ctx, name, types.StrategicMergePatchType, data, metav1.PatchOptions{}) 551 return err 552 } 553 554 func GetPodFromTemplate(template *v1.PodTemplateSpec, parentObject runtime.Object, controllerRef *metav1.OwnerReference) (*v1.Pod, error) { 555 desiredLabels := getPodsLabelSet(template) 556 desiredFinalizers := getPodsFinalizers(template) 557 desiredAnnotations := getPodsAnnotationSet(template) 558 accessor, err := meta.Accessor(parentObject) 559 if err != nil { 560 return nil, fmt.Errorf("parentObject does not have ObjectMeta, %v", err) 561 } 562 prefix := getPodsPrefix(accessor.GetName()) 563 564 pod := &v1.Pod{ 565 ObjectMeta: metav1.ObjectMeta{ 566 Labels: desiredLabels, 567 Annotations: desiredAnnotations, 568 GenerateName: prefix, 569 Finalizers: desiredFinalizers, 570 }, 571 } 572 if controllerRef != nil { 573 pod.OwnerReferences = append(pod.OwnerReferences, *controllerRef) 574 } 575 pod.Spec = *template.Spec.DeepCopy() 576 return pod, nil 577 } 578 579 func (r RealPodControl) createPods(ctx context.Context, namespace string, pod *v1.Pod, object runtime.Object) error { 580 if len(labels.Set(pod.Labels)) == 0 { 581 return fmt.Errorf("unable to create pods, no labels") 582 } 583 newPod, err := r.KubeClient.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{}) 584 if err != nil { 585 // only send an event if the namespace isn't terminating 586 if !apierrors.HasStatusCause(err, v1.NamespaceTerminatingCause) { 587 r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err) 588 } 589 return err 590 } 591 logger := klog.FromContext(ctx) 592 accessor, err := meta.Accessor(object) 593 if err != nil { 594 logger.Error(err, "parentObject does not have ObjectMeta") 595 return nil 596 } 597 logger.V(4).Info("Controller created pod", "controller", accessor.GetName(), "pod", klog.KObj(newPod)) 598 r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulCreatePodReason, "Created pod: %v", newPod.Name) 599 600 return nil 601 } 602 603 func (r RealPodControl) DeletePod(ctx context.Context, namespace string, podID string, object runtime.Object) error { 604 accessor, err := meta.Accessor(object) 605 if err != nil { 606 return fmt.Errorf("object does not have ObjectMeta, %v", err) 607 } 608 logger := klog.FromContext(ctx) 609 logger.V(2).Info("Deleting pod", "controller", accessor.GetName(), "pod", klog.KRef(namespace, podID)) 610 if err := r.KubeClient.CoreV1().Pods(namespace).Delete(ctx, podID, metav1.DeleteOptions{}); err != nil { 611 if apierrors.IsNotFound(err) { 612 logger.V(4).Info("Pod has already been deleted.", "pod", klog.KRef(namespace, podID)) 613 return err 614 } 615 r.Recorder.Eventf(object, v1.EventTypeWarning, FailedDeletePodReason, "Error deleting: %v", err) 616 return fmt.Errorf("unable to delete pods: %v", err) 617 } 618 r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulDeletePodReason, "Deleted pod: %v", podID) 619 620 return nil 621 } 622 623 type FakePodControl struct { 624 sync.Mutex 625 Templates []v1.PodTemplateSpec 626 ControllerRefs []metav1.OwnerReference 627 DeletePodName []string 628 Patches [][]byte 629 Err error 630 CreateLimit int 631 CreateCallCount int 632 } 633 634 var _ PodControlInterface = &FakePodControl{} 635 636 func (f *FakePodControl) PatchPod(ctx context.Context, namespace, name string, data []byte) error { 637 f.Lock() 638 defer f.Unlock() 639 f.Patches = append(f.Patches, data) 640 if f.Err != nil { 641 return f.Err 642 } 643 return nil 644 } 645 646 func (f *FakePodControl) CreatePods(ctx context.Context, namespace string, spec *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error { 647 return f.CreatePodsWithGenerateName(ctx, namespace, spec, object, controllerRef, "") 648 } 649 650 func (f *FakePodControl) CreatePodsWithGenerateName(ctx context.Context, namespace string, spec *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference, generateNamePrefix string) error { 651 f.Lock() 652 defer f.Unlock() 653 f.CreateCallCount++ 654 if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit { 655 return fmt.Errorf("not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount) 656 } 657 spec.GenerateName = generateNamePrefix 658 f.Templates = append(f.Templates, *spec) 659 f.ControllerRefs = append(f.ControllerRefs, *controllerRef) 660 if f.Err != nil { 661 return f.Err 662 } 663 return nil 664 } 665 666 func (f *FakePodControl) DeletePod(ctx context.Context, namespace string, podID string, object runtime.Object) error { 667 f.Lock() 668 defer f.Unlock() 669 f.DeletePodName = append(f.DeletePodName, podID) 670 if f.Err != nil { 671 return f.Err 672 } 673 return nil 674 } 675 676 func (f *FakePodControl) Clear() { 677 f.Lock() 678 defer f.Unlock() 679 f.DeletePodName = []string{} 680 f.Templates = []v1.PodTemplateSpec{} 681 f.ControllerRefs = []metav1.OwnerReference{} 682 f.Patches = [][]byte{} 683 f.CreateLimit = 0 684 f.CreateCallCount = 0 685 } 686 687 // ByLogging allows custom sorting of pods so the best one can be picked for getting its logs. 688 type ByLogging []*v1.Pod 689 690 func (s ByLogging) Len() int { return len(s) } 691 func (s ByLogging) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 692 693 func (s ByLogging) Less(i, j int) bool { 694 // 1. assigned < unassigned 695 if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) { 696 return len(s[i].Spec.NodeName) > 0 697 } 698 // 2. PodRunning < PodUnknown < PodPending 699 if s[i].Status.Phase != s[j].Status.Phase { 700 return podPhaseToOrdinal[s[i].Status.Phase] > podPhaseToOrdinal[s[j].Status.Phase] 701 } 702 // 3. ready < not ready 703 if podutil.IsPodReady(s[i]) != podutil.IsPodReady(s[j]) { 704 return podutil.IsPodReady(s[i]) 705 } 706 // TODO: take availability into account when we push minReadySeconds information from deployment into pods, 707 // see https://github.com/kubernetes/kubernetes/issues/22065 708 // 4. Been ready for more time < less time < empty time 709 if podutil.IsPodReady(s[i]) && podutil.IsPodReady(s[j]) { 710 readyTime1 := podReadyTime(s[i]) 711 readyTime2 := podReadyTime(s[j]) 712 if !readyTime1.Equal(readyTime2) { 713 return afterOrZero(readyTime2, readyTime1) 714 } 715 } 716 // 5. Pods with containers with higher restart counts < lower restart counts 717 if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) { 718 return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j]) 719 } 720 // 6. older pods < newer pods < empty timestamp pods 721 if !s[i].CreationTimestamp.Equal(&s[j].CreationTimestamp) { 722 return afterOrZero(&s[j].CreationTimestamp, &s[i].CreationTimestamp) 723 } 724 return false 725 } 726 727 // ActivePods type allows custom sorting of pods so a controller can pick the best ones to delete. 728 type ActivePods []*v1.Pod 729 730 func (s ActivePods) Len() int { return len(s) } 731 func (s ActivePods) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 732 733 func (s ActivePods) Less(i, j int) bool { 734 // 1. Unassigned < assigned 735 // If only one of the pods is unassigned, the unassigned one is smaller 736 if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) { 737 return len(s[i].Spec.NodeName) == 0 738 } 739 // 2. PodPending < PodUnknown < PodRunning 740 if podPhaseToOrdinal[s[i].Status.Phase] != podPhaseToOrdinal[s[j].Status.Phase] { 741 return podPhaseToOrdinal[s[i].Status.Phase] < podPhaseToOrdinal[s[j].Status.Phase] 742 } 743 // 3. Not ready < ready 744 // If only one of the pods is not ready, the not ready one is smaller 745 if podutil.IsPodReady(s[i]) != podutil.IsPodReady(s[j]) { 746 return !podutil.IsPodReady(s[i]) 747 } 748 // TODO: take availability into account when we push minReadySeconds information from deployment into pods, 749 // see https://github.com/kubernetes/kubernetes/issues/22065 750 // 4. Been ready for empty time < less time < more time 751 // If both pods are ready, the latest ready one is smaller 752 if podutil.IsPodReady(s[i]) && podutil.IsPodReady(s[j]) { 753 readyTime1 := podReadyTime(s[i]) 754 readyTime2 := podReadyTime(s[j]) 755 if !readyTime1.Equal(readyTime2) { 756 return afterOrZero(readyTime1, readyTime2) 757 } 758 } 759 // 5. Pods with containers with higher restart counts < lower restart counts 760 if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) { 761 return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j]) 762 } 763 // 6. Empty creation time pods < newer pods < older pods 764 if !s[i].CreationTimestamp.Equal(&s[j].CreationTimestamp) { 765 return afterOrZero(&s[i].CreationTimestamp, &s[j].CreationTimestamp) 766 } 767 return false 768 } 769 770 // ActivePodsWithRanks is a sortable list of pods and a list of corresponding 771 // ranks which will be considered during sorting. The two lists must have equal 772 // length. After sorting, the pods will be ordered as follows, applying each 773 // rule in turn until one matches: 774 // 775 // 1. If only one of the pods is assigned to a node, the pod that is not 776 // assigned comes before the pod that is. 777 // 2. If the pods' phases differ, a pending pod comes before a pod whose phase 778 // is unknown, and a pod whose phase is unknown comes before a running pod. 779 // 3. If exactly one of the pods is ready, the pod that is not ready comes 780 // before the ready pod. 781 // 4. If controller.kubernetes.io/pod-deletion-cost annotation is set, then 782 // the pod with the lower value will come first. 783 // 5. If the pods' ranks differ, the pod with greater rank comes before the pod 784 // with lower rank. 785 // 6. If both pods are ready but have not been ready for the same amount of 786 // time, the pod that has been ready for a shorter amount of time comes 787 // before the pod that has been ready for longer. 788 // 7. If one pod has a container that has restarted more than any container in 789 // the other pod, the pod with the container with more restarts comes 790 // before the other pod. 791 // 8. If the pods' creation times differ, the pod that was created more recently 792 // comes before the older pod. 793 // 794 // In 6 and 8, times are compared in a logarithmic scale. This allows a level 795 // of randomness among equivalent Pods when sorting. If two pods have the same 796 // logarithmic rank, they are sorted by UUID to provide a pseudorandom order. 797 // 798 // If none of these rules matches, the second pod comes before the first pod. 799 // 800 // The intention of this ordering is to put pods that should be preferred for 801 // deletion first in the list. 802 type ActivePodsWithRanks struct { 803 // Pods is a list of pods. 804 Pods []*v1.Pod 805 806 // Rank is a ranking of pods. This ranking is used during sorting when 807 // comparing two pods that are both scheduled, in the same phase, and 808 // having the same ready status. 809 Rank []int 810 811 // Now is a reference timestamp for doing logarithmic timestamp comparisons. 812 // If zero, comparison happens without scaling. 813 Now metav1.Time 814 } 815 816 func (s ActivePodsWithRanks) Len() int { 817 return len(s.Pods) 818 } 819 820 func (s ActivePodsWithRanks) Swap(i, j int) { 821 s.Pods[i], s.Pods[j] = s.Pods[j], s.Pods[i] 822 s.Rank[i], s.Rank[j] = s.Rank[j], s.Rank[i] 823 } 824 825 // Less compares two pods with corresponding ranks and returns true if the first 826 // one should be preferred for deletion. 827 func (s ActivePodsWithRanks) Less(i, j int) bool { 828 // 1. Unassigned < assigned 829 // If only one of the pods is unassigned, the unassigned one is smaller 830 if s.Pods[i].Spec.NodeName != s.Pods[j].Spec.NodeName && (len(s.Pods[i].Spec.NodeName) == 0 || len(s.Pods[j].Spec.NodeName) == 0) { 831 return len(s.Pods[i].Spec.NodeName) == 0 832 } 833 // 2. PodPending < PodUnknown < PodRunning 834 if podPhaseToOrdinal[s.Pods[i].Status.Phase] != podPhaseToOrdinal[s.Pods[j].Status.Phase] { 835 return podPhaseToOrdinal[s.Pods[i].Status.Phase] < podPhaseToOrdinal[s.Pods[j].Status.Phase] 836 } 837 // 3. Not ready < ready 838 // If only one of the pods is not ready, the not ready one is smaller 839 if podutil.IsPodReady(s.Pods[i]) != podutil.IsPodReady(s.Pods[j]) { 840 return !podutil.IsPodReady(s.Pods[i]) 841 } 842 843 // 4. lower pod-deletion-cost < higher pod-deletion cost 844 if utilfeature.DefaultFeatureGate.Enabled(features.PodDeletionCost) { 845 pi, _ := helper.GetDeletionCostFromPodAnnotations(s.Pods[i].Annotations) 846 pj, _ := helper.GetDeletionCostFromPodAnnotations(s.Pods[j].Annotations) 847 if pi != pj { 848 return pi < pj 849 } 850 } 851 852 // 5. Doubled up < not doubled up 853 // If one of the two pods is on the same node as one or more additional 854 // ready pods that belong to the same replicaset, whichever pod has more 855 // colocated ready pods is less 856 if s.Rank[i] != s.Rank[j] { 857 return s.Rank[i] > s.Rank[j] 858 } 859 // TODO: take availability into account when we push minReadySeconds information from deployment into pods, 860 // see https://github.com/kubernetes/kubernetes/issues/22065 861 // 6. Been ready for empty time < less time < more time 862 // If both pods are ready, the latest ready one is smaller 863 if podutil.IsPodReady(s.Pods[i]) && podutil.IsPodReady(s.Pods[j]) { 864 readyTime1 := podReadyTime(s.Pods[i]) 865 readyTime2 := podReadyTime(s.Pods[j]) 866 if !readyTime1.Equal(readyTime2) { 867 if !utilfeature.DefaultFeatureGate.Enabled(features.LogarithmicScaleDown) { 868 return afterOrZero(readyTime1, readyTime2) 869 } else { 870 if s.Now.IsZero() || readyTime1.IsZero() || readyTime2.IsZero() { 871 return afterOrZero(readyTime1, readyTime2) 872 } 873 rankDiff := logarithmicRankDiff(*readyTime1, *readyTime2, s.Now) 874 if rankDiff == 0 { 875 return s.Pods[i].UID < s.Pods[j].UID 876 } 877 return rankDiff < 0 878 } 879 } 880 } 881 // 7. Pods with containers with higher restart counts < lower restart counts 882 if maxContainerRestarts(s.Pods[i]) != maxContainerRestarts(s.Pods[j]) { 883 return maxContainerRestarts(s.Pods[i]) > maxContainerRestarts(s.Pods[j]) 884 } 885 // 8. Empty creation time pods < newer pods < older pods 886 if !s.Pods[i].CreationTimestamp.Equal(&s.Pods[j].CreationTimestamp) { 887 if !utilfeature.DefaultFeatureGate.Enabled(features.LogarithmicScaleDown) { 888 return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp) 889 } else { 890 if s.Now.IsZero() || s.Pods[i].CreationTimestamp.IsZero() || s.Pods[j].CreationTimestamp.IsZero() { 891 return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp) 892 } 893 rankDiff := logarithmicRankDiff(s.Pods[i].CreationTimestamp, s.Pods[j].CreationTimestamp, s.Now) 894 if rankDiff == 0 { 895 return s.Pods[i].UID < s.Pods[j].UID 896 } 897 return rankDiff < 0 898 } 899 } 900 return false 901 } 902 903 // afterOrZero checks if time t1 is after time t2; if one of them 904 // is zero, the zero time is seen as after non-zero time. 905 func afterOrZero(t1, t2 *metav1.Time) bool { 906 if t1.Time.IsZero() || t2.Time.IsZero() { 907 return t1.Time.IsZero() 908 } 909 return t1.After(t2.Time) 910 } 911 912 // logarithmicRankDiff calculates the base-2 logarithmic ranks of 2 timestamps, 913 // compared to the current timestamp 914 func logarithmicRankDiff(t1, t2, now metav1.Time) int64 { 915 d1 := now.Sub(t1.Time) 916 d2 := now.Sub(t2.Time) 917 r1 := int64(-1) 918 r2 := int64(-1) 919 if d1 > 0 { 920 r1 = int64(math.Log2(float64(d1))) 921 } 922 if d2 > 0 { 923 r2 = int64(math.Log2(float64(d2))) 924 } 925 return r1 - r2 926 } 927 928 func podReadyTime(pod *v1.Pod) *metav1.Time { 929 if podutil.IsPodReady(pod) { 930 for _, c := range pod.Status.Conditions { 931 // we only care about pod ready conditions 932 if c.Type == v1.PodReady && c.Status == v1.ConditionTrue { 933 return &c.LastTransitionTime 934 } 935 } 936 } 937 return &metav1.Time{} 938 } 939 940 func maxContainerRestarts(pod *v1.Pod) int { 941 maxRestarts := 0 942 for _, c := range pod.Status.ContainerStatuses { 943 maxRestarts = integer.IntMax(maxRestarts, int(c.RestartCount)) 944 } 945 return maxRestarts 946 } 947 948 // FilterActivePods returns pods that have not terminated. 949 func FilterActivePods(logger klog.Logger, pods []*v1.Pod) []*v1.Pod { 950 var result []*v1.Pod 951 for _, p := range pods { 952 if IsPodActive(p) { 953 result = append(result, p) 954 } else { 955 logger.V(4).Info("Ignoring inactive pod", "pod", klog.KObj(p), "phase", p.Status.Phase, "deletionTime", p.DeletionTimestamp) 956 } 957 } 958 return result 959 } 960 961 func FilterTerminatingPods(pods []*v1.Pod) []*v1.Pod { 962 var result []*v1.Pod 963 for _, p := range pods { 964 if IsPodTerminating(p) { 965 result = append(result, p) 966 } 967 } 968 return result 969 } 970 971 func CountTerminatingPods(pods []*v1.Pod) int32 { 972 numberOfTerminatingPods := 0 973 for _, p := range pods { 974 if IsPodTerminating(p) { 975 numberOfTerminatingPods += 1 976 } 977 } 978 return int32(numberOfTerminatingPods) 979 } 980 981 func IsPodActive(p *v1.Pod) bool { 982 return v1.PodSucceeded != p.Status.Phase && 983 v1.PodFailed != p.Status.Phase && 984 p.DeletionTimestamp == nil 985 } 986 987 func IsPodTerminating(p *v1.Pod) bool { 988 return !podutil.IsPodTerminal(p) && 989 p.DeletionTimestamp != nil 990 } 991 992 // FilterActiveReplicaSets returns replica sets that have (or at least ought to have) pods. 993 func FilterActiveReplicaSets(replicaSets []*apps.ReplicaSet) []*apps.ReplicaSet { 994 activeFilter := func(rs *apps.ReplicaSet) bool { 995 return rs != nil && *(rs.Spec.Replicas) > 0 996 } 997 return FilterReplicaSets(replicaSets, activeFilter) 998 } 999 1000 type filterRS func(rs *apps.ReplicaSet) bool 1001 1002 // FilterReplicaSets returns replica sets that are filtered by filterFn (all returned ones should match filterFn). 1003 func FilterReplicaSets(RSes []*apps.ReplicaSet, filterFn filterRS) []*apps.ReplicaSet { 1004 var filtered []*apps.ReplicaSet 1005 for i := range RSes { 1006 if filterFn(RSes[i]) { 1007 filtered = append(filtered, RSes[i]) 1008 } 1009 } 1010 return filtered 1011 } 1012 1013 // PodKey returns a key unique to the given pod within a cluster. 1014 // It's used so we consistently use the same key scheme in this module. 1015 // It does exactly what cache.MetaNamespaceKeyFunc would have done 1016 // except there's not possibility for error since we know the exact type. 1017 func PodKey(pod *v1.Pod) string { 1018 return fmt.Sprintf("%v/%v", pod.Namespace, pod.Name) 1019 } 1020 1021 // ControllersByCreationTimestamp sorts a list of ReplicationControllers by creation timestamp, using their names as a tie breaker. 1022 type ControllersByCreationTimestamp []*v1.ReplicationController 1023 1024 func (o ControllersByCreationTimestamp) Len() int { return len(o) } 1025 func (o ControllersByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] } 1026 func (o ControllersByCreationTimestamp) Less(i, j int) bool { 1027 if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) { 1028 return o[i].Name < o[j].Name 1029 } 1030 return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp) 1031 } 1032 1033 // ReplicaSetsByCreationTimestamp sorts a list of ReplicaSet by creation timestamp, using their names as a tie breaker. 1034 type ReplicaSetsByCreationTimestamp []*apps.ReplicaSet 1035 1036 func (o ReplicaSetsByCreationTimestamp) Len() int { return len(o) } 1037 func (o ReplicaSetsByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] } 1038 func (o ReplicaSetsByCreationTimestamp) Less(i, j int) bool { 1039 if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) { 1040 return o[i].Name < o[j].Name 1041 } 1042 return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp) 1043 } 1044 1045 // ReplicaSetsBySizeOlder sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker. 1046 // By using the creation timestamp, this sorts from old to new replica sets. 1047 type ReplicaSetsBySizeOlder []*apps.ReplicaSet 1048 1049 func (o ReplicaSetsBySizeOlder) Len() int { return len(o) } 1050 func (o ReplicaSetsBySizeOlder) Swap(i, j int) { o[i], o[j] = o[j], o[i] } 1051 func (o ReplicaSetsBySizeOlder) Less(i, j int) bool { 1052 if *(o[i].Spec.Replicas) == *(o[j].Spec.Replicas) { 1053 return ReplicaSetsByCreationTimestamp(o).Less(i, j) 1054 } 1055 return *(o[i].Spec.Replicas) > *(o[j].Spec.Replicas) 1056 } 1057 1058 // ReplicaSetsBySizeNewer sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker. 1059 // By using the creation timestamp, this sorts from new to old replica sets. 1060 type ReplicaSetsBySizeNewer []*apps.ReplicaSet 1061 1062 func (o ReplicaSetsBySizeNewer) Len() int { return len(o) } 1063 func (o ReplicaSetsBySizeNewer) Swap(i, j int) { o[i], o[j] = o[j], o[i] } 1064 func (o ReplicaSetsBySizeNewer) Less(i, j int) bool { 1065 if *(o[i].Spec.Replicas) == *(o[j].Spec.Replicas) { 1066 return ReplicaSetsByCreationTimestamp(o).Less(j, i) 1067 } 1068 return *(o[i].Spec.Replicas) > *(o[j].Spec.Replicas) 1069 } 1070 1071 // AddOrUpdateTaintOnNode add taints to the node. If taint was added into node, it'll issue API calls 1072 // to update nodes; otherwise, no API calls. Return error if any. 1073 func AddOrUpdateTaintOnNode(ctx context.Context, c clientset.Interface, nodeName string, taints ...*v1.Taint) error { 1074 if len(taints) == 0 { 1075 return nil 1076 } 1077 firstTry := true 1078 return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error { 1079 var err error 1080 var oldNode *v1.Node 1081 // First we try getting node from the API server cache, as it's cheaper. If it fails 1082 // we get it from etcd to be sure to have fresh data. 1083 option := metav1.GetOptions{} 1084 if firstTry { 1085 option.ResourceVersion = "0" 1086 firstTry = false 1087 } 1088 oldNode, err = c.CoreV1().Nodes().Get(ctx, nodeName, option) 1089 if err != nil { 1090 return err 1091 } 1092 1093 var newNode *v1.Node 1094 oldNodeCopy := oldNode 1095 updated := false 1096 for _, taint := range taints { 1097 curNewNode, ok, err := taintutils.AddOrUpdateTaint(oldNodeCopy, taint) 1098 if err != nil { 1099 return fmt.Errorf("failed to update taint of node") 1100 } 1101 updated = updated || ok 1102 newNode = curNewNode 1103 oldNodeCopy = curNewNode 1104 } 1105 if !updated { 1106 return nil 1107 } 1108 return PatchNodeTaints(ctx, c, nodeName, oldNode, newNode) 1109 }) 1110 } 1111 1112 // RemoveTaintOffNode is for cleaning up taints temporarily added to node, 1113 // won't fail if target taint doesn't exist or has been removed. 1114 // If passed a node it'll check if there's anything to be done, if taint is not present it won't issue 1115 // any API calls. 1116 func RemoveTaintOffNode(ctx context.Context, c clientset.Interface, nodeName string, node *v1.Node, taints ...*v1.Taint) error { 1117 if len(taints) == 0 { 1118 return nil 1119 } 1120 // Short circuit for limiting amount of API calls. 1121 if node != nil { 1122 match := false 1123 for _, taint := range taints { 1124 if taintutils.TaintExists(node.Spec.Taints, taint) { 1125 match = true 1126 break 1127 } 1128 } 1129 if !match { 1130 return nil 1131 } 1132 } 1133 1134 firstTry := true 1135 return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error { 1136 var err error 1137 var oldNode *v1.Node 1138 // First we try getting node from the API server cache, as it's cheaper. If it fails 1139 // we get it from etcd to be sure to have fresh data. 1140 option := metav1.GetOptions{} 1141 if firstTry { 1142 option.ResourceVersion = "0" 1143 firstTry = false 1144 } 1145 oldNode, err = c.CoreV1().Nodes().Get(ctx, nodeName, option) 1146 if err != nil { 1147 return err 1148 } 1149 1150 var newNode *v1.Node 1151 oldNodeCopy := oldNode 1152 updated := false 1153 for _, taint := range taints { 1154 curNewNode, ok, err := taintutils.RemoveTaint(oldNodeCopy, taint) 1155 if err != nil { 1156 return fmt.Errorf("failed to remove taint of node") 1157 } 1158 updated = updated || ok 1159 newNode = curNewNode 1160 oldNodeCopy = curNewNode 1161 } 1162 if !updated { 1163 return nil 1164 } 1165 return PatchNodeTaints(ctx, c, nodeName, oldNode, newNode) 1166 }) 1167 } 1168 1169 // PatchNodeTaints patches node's taints. 1170 func PatchNodeTaints(ctx context.Context, c clientset.Interface, nodeName string, oldNode *v1.Node, newNode *v1.Node) error { 1171 // Strip base diff node from RV to ensure that our Patch request will set RV to check for conflicts over .spec.taints. 1172 // This is needed because .spec.taints does not specify patchMergeKey and patchStrategy and adding them is no longer an option for compatibility reasons. 1173 // Using other Patch strategy works for adding new taints, however will not resolve problem with taint removal. 1174 oldNodeNoRV := oldNode.DeepCopy() 1175 oldNodeNoRV.ResourceVersion = "" 1176 oldDataNoRV, err := json.Marshal(&oldNodeNoRV) 1177 if err != nil { 1178 return fmt.Errorf("failed to marshal old node %#v for node %q: %v", oldNodeNoRV, nodeName, err) 1179 } 1180 1181 newTaints := newNode.Spec.Taints 1182 newNodeClone := oldNode.DeepCopy() 1183 newNodeClone.Spec.Taints = newTaints 1184 newData, err := json.Marshal(newNodeClone) 1185 if err != nil { 1186 return fmt.Errorf("failed to marshal new node %#v for node %q: %v", newNodeClone, nodeName, err) 1187 } 1188 1189 patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldDataNoRV, newData, v1.Node{}) 1190 if err != nil { 1191 return fmt.Errorf("failed to create patch for node %q: %v", nodeName, err) 1192 } 1193 1194 _, err = c.CoreV1().Nodes().Patch(ctx, nodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) 1195 return err 1196 } 1197 1198 // ComputeHash returns a hash value calculated from pod template and 1199 // a collisionCount to avoid hash collision. The hash will be safe encoded to 1200 // avoid bad words. 1201 func ComputeHash(template *v1.PodTemplateSpec, collisionCount *int32) string { 1202 podTemplateSpecHasher := fnv.New32a() 1203 hashutil.DeepHashObject(podTemplateSpecHasher, *template) 1204 1205 // Add collisionCount in the hash if it exists. 1206 if collisionCount != nil { 1207 collisionCountBytes := make([]byte, 8) 1208 binary.LittleEndian.PutUint32(collisionCountBytes, uint32(*collisionCount)) 1209 podTemplateSpecHasher.Write(collisionCountBytes) 1210 } 1211 1212 return rand.SafeEncodeString(fmt.Sprint(podTemplateSpecHasher.Sum32())) 1213 } 1214 1215 func AddOrUpdateLabelsOnNode(kubeClient clientset.Interface, nodeName string, labelsToUpdate map[string]string) error { 1216 firstTry := true 1217 return clientretry.RetryOnConflict(UpdateLabelBackoff, func() error { 1218 var err error 1219 var node *v1.Node 1220 // First we try getting node from the API server cache, as it's cheaper. If it fails 1221 // we get it from etcd to be sure to have fresh data. 1222 option := metav1.GetOptions{} 1223 if firstTry { 1224 option.ResourceVersion = "0" 1225 firstTry = false 1226 } 1227 node, err = kubeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, option) 1228 if err != nil { 1229 return err 1230 } 1231 1232 // Make a copy of the node and update the labels. 1233 newNode := node.DeepCopy() 1234 if newNode.Labels == nil { 1235 newNode.Labels = make(map[string]string) 1236 } 1237 for key, value := range labelsToUpdate { 1238 newNode.Labels[key] = value 1239 } 1240 1241 oldData, err := json.Marshal(node) 1242 if err != nil { 1243 return fmt.Errorf("failed to marshal the existing node %#v: %v", node, err) 1244 } 1245 newData, err := json.Marshal(newNode) 1246 if err != nil { 1247 return fmt.Errorf("failed to marshal the new node %#v: %v", newNode, err) 1248 } 1249 patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, &v1.Node{}) 1250 if err != nil { 1251 return fmt.Errorf("failed to create a two-way merge patch: %v", err) 1252 } 1253 if _, err := kubeClient.CoreV1().Nodes().Patch(context.TODO(), node.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}); err != nil { 1254 return fmt.Errorf("failed to patch the node: %v", err) 1255 } 1256 return nil 1257 }) 1258 }