k8s.io/kubernetes@v1.29.3/pkg/controller/controller_utils.go (about)

     1  /*
     2  Copyright 2014 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controller
    18  
    19  import (
    20  	"context"
    21  	"encoding/binary"
    22  	"encoding/json"
    23  	"fmt"
    24  	"hash/fnv"
    25  	"math"
    26  	"sync"
    27  	"sync/atomic"
    28  	"time"
    29  
    30  	apps "k8s.io/api/apps/v1"
    31  	v1 "k8s.io/api/core/v1"
    32  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    33  	"k8s.io/apimachinery/pkg/api/meta"
    34  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    35  	"k8s.io/apimachinery/pkg/labels"
    36  	"k8s.io/apimachinery/pkg/runtime"
    37  	"k8s.io/apimachinery/pkg/types"
    38  	"k8s.io/apimachinery/pkg/util/rand"
    39  	"k8s.io/apimachinery/pkg/util/sets"
    40  	"k8s.io/apimachinery/pkg/util/strategicpatch"
    41  	"k8s.io/apimachinery/pkg/util/wait"
    42  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    43  	clientset "k8s.io/client-go/kubernetes"
    44  	"k8s.io/client-go/tools/cache"
    45  	"k8s.io/client-go/tools/record"
    46  	clientretry "k8s.io/client-go/util/retry"
    47  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    48  	"k8s.io/kubernetes/pkg/apis/core/helper"
    49  	_ "k8s.io/kubernetes/pkg/apis/core/install"
    50  	"k8s.io/kubernetes/pkg/apis/core/validation"
    51  	"k8s.io/kubernetes/pkg/features"
    52  	hashutil "k8s.io/kubernetes/pkg/util/hash"
    53  	taintutils "k8s.io/kubernetes/pkg/util/taints"
    54  	"k8s.io/utils/clock"
    55  	"k8s.io/utils/integer"
    56  
    57  	"k8s.io/klog/v2"
    58  )
    59  
    60  const (
    61  	// If a watch drops a delete event for a pod, it'll take this long
    62  	// before a dormant controller waiting for those packets is woken up anyway. It is
    63  	// specifically targeted at the case where some problem prevents an update
    64  	// of expectations, without it the controller could stay asleep forever. This should
    65  	// be set based on the expected latency of watch events.
    66  	//
    67  	// Currently a controller can service (create *and* observe the watch events for said
    68  	// creation) about 10 pods a second, so it takes about 1 min to service
    69  	// 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s
    70  	// latency/pod at the scale of 3000 pods over 100 nodes.
    71  	ExpectationsTimeout = 5 * time.Minute
    72  	// When batching pod creates, SlowStartInitialBatchSize is the size of the
    73  	// initial batch.  The size of each successive batch is twice the size of
    74  	// the previous batch.  For example, for a value of 1, batch sizes would be
    75  	// 1, 2, 4, 8, ...  and for a value of 10, batch sizes would be
    76  	// 10, 20, 40, 80, ...  Setting the value higher means that quota denials
    77  	// will result in more doomed API calls and associated event spam.  Setting
    78  	// the value lower will result in more API call round trip periods for
    79  	// large batches.
    80  	//
    81  	// Given a number of pods to start "N":
    82  	// The number of doomed calls per sync once quota is exceeded is given by:
    83  	//      min(N,SlowStartInitialBatchSize)
    84  	// The number of batches is given by:
    85  	//      1+floor(log_2(ceil(N/SlowStartInitialBatchSize)))
    86  	SlowStartInitialBatchSize = 1
    87  )
    88  
    89  var UpdateTaintBackoff = wait.Backoff{
    90  	Steps:    5,
    91  	Duration: 100 * time.Millisecond,
    92  	Jitter:   1.0,
    93  }
    94  
    95  var UpdateLabelBackoff = wait.Backoff{
    96  	Steps:    5,
    97  	Duration: 100 * time.Millisecond,
    98  	Jitter:   1.0,
    99  }
   100  
   101  var (
   102  	KeyFunc           = cache.DeletionHandlingMetaNamespaceKeyFunc
   103  	podPhaseToOrdinal = map[v1.PodPhase]int{v1.PodPending: 0, v1.PodUnknown: 1, v1.PodRunning: 2}
   104  )
   105  
   106  type ResyncPeriodFunc func() time.Duration
   107  
   108  // Returns 0 for resyncPeriod in case resyncing is not needed.
   109  func NoResyncPeriodFunc() time.Duration {
   110  	return 0
   111  }
   112  
   113  // StaticResyncPeriodFunc returns the resync period specified
   114  func StaticResyncPeriodFunc(resyncPeriod time.Duration) ResyncPeriodFunc {
   115  	return func() time.Duration {
   116  		return resyncPeriod
   117  	}
   118  }
   119  
   120  // Expectations are a way for controllers to tell the controller manager what they expect. eg:
   121  //	ControllerExpectations: {
   122  //		controller1: expects  2 adds in 2 minutes
   123  //		controller2: expects  2 dels in 2 minutes
   124  //		controller3: expects -1 adds in 2 minutes => controller3's expectations have already been met
   125  //	}
   126  //
   127  // Implementation:
   128  //	ControlleeExpectation = pair of atomic counters to track controllee's creation/deletion
   129  //	ControllerExpectationsStore = TTLStore + a ControlleeExpectation per controller
   130  //
   131  // * Once set expectations can only be lowered
   132  // * A controller isn't synced till its expectations are either fulfilled, or expire
   133  // * Controllers that don't set expectations will get woken up for every matching controllee
   134  
   135  // ExpKeyFunc to parse out the key from a ControlleeExpectation
   136  var ExpKeyFunc = func(obj interface{}) (string, error) {
   137  	if e, ok := obj.(*ControlleeExpectations); ok {
   138  		return e.key, nil
   139  	}
   140  	return "", fmt.Errorf("could not find key for obj %#v", obj)
   141  }
   142  
   143  // ControllerExpectationsInterface is an interface that allows users to set and wait on expectations.
   144  // Only abstracted out for testing.
   145  // Warning: if using KeyFunc it is not safe to use a single ControllerExpectationsInterface with different
   146  // types of controllers, because the keys might conflict across types.
   147  type ControllerExpectationsInterface interface {
   148  	GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error)
   149  	SatisfiedExpectations(logger klog.Logger, controllerKey string) bool
   150  	DeleteExpectations(logger klog.Logger, controllerKey string)
   151  	SetExpectations(logger klog.Logger, controllerKey string, add, del int) error
   152  	ExpectCreations(logger klog.Logger, controllerKey string, adds int) error
   153  	ExpectDeletions(logger klog.Logger, controllerKey string, dels int) error
   154  	CreationObserved(logger klog.Logger, controllerKey string)
   155  	DeletionObserved(logger klog.Logger, controllerKey string)
   156  	RaiseExpectations(logger klog.Logger, controllerKey string, add, del int)
   157  	LowerExpectations(logger klog.Logger, controllerKey string, add, del int)
   158  }
   159  
   160  // ControllerExpectations is a cache mapping controllers to what they expect to see before being woken up for a sync.
   161  type ControllerExpectations struct {
   162  	cache.Store
   163  }
   164  
   165  // GetExpectations returns the ControlleeExpectations of the given controller.
   166  func (r *ControllerExpectations) GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) {
   167  	exp, exists, err := r.GetByKey(controllerKey)
   168  	if err == nil && exists {
   169  		return exp.(*ControlleeExpectations), true, nil
   170  	}
   171  	return nil, false, err
   172  }
   173  
   174  // DeleteExpectations deletes the expectations of the given controller from the TTLStore.
   175  func (r *ControllerExpectations) DeleteExpectations(logger klog.Logger, controllerKey string) {
   176  	if exp, exists, err := r.GetByKey(controllerKey); err == nil && exists {
   177  		if err := r.Delete(exp); err != nil {
   178  
   179  			logger.V(2).Info("Error deleting expectations", "controller", controllerKey, "err", err)
   180  		}
   181  	}
   182  }
   183  
   184  // SatisfiedExpectations returns true if the required adds/dels for the given controller have been observed.
   185  // Add/del counts are established by the controller at sync time, and updated as controllees are observed by the controller
   186  // manager.
   187  func (r *ControllerExpectations) SatisfiedExpectations(logger klog.Logger, controllerKey string) bool {
   188  	if exp, exists, err := r.GetExpectations(controllerKey); exists {
   189  		if exp.Fulfilled() {
   190  			logger.V(4).Info("Controller expectations fulfilled", "expectations", exp)
   191  			return true
   192  		} else if exp.isExpired() {
   193  			logger.V(4).Info("Controller expectations expired", "expectations", exp)
   194  			return true
   195  		} else {
   196  			logger.V(4).Info("Controller still waiting on expectations", "expectations", exp)
   197  			return false
   198  		}
   199  	} else if err != nil {
   200  		logger.V(2).Info("Error encountered while checking expectations, forcing sync", "err", err)
   201  	} else {
   202  		// When a new controller is created, it doesn't have expectations.
   203  		// When it doesn't see expected watch events for > TTL, the expectations expire.
   204  		//	- In this case it wakes up, creates/deletes controllees, and sets expectations again.
   205  		// When it has satisfied expectations and no controllees need to be created/destroyed > TTL, the expectations expire.
   206  		//	- In this case it continues without setting expectations till it needs to create/delete controllees.
   207  		logger.V(4).Info("Controller either never recorded expectations, or the ttl expired", "controller", controllerKey)
   208  	}
   209  	// Trigger a sync if we either encountered and error (which shouldn't happen since we're
   210  	// getting from local store) or this controller hasn't established expectations.
   211  	return true
   212  }
   213  
   214  // TODO: Extend ExpirationCache to support explicit expiration.
   215  // TODO: Make this possible to disable in tests.
   216  // TODO: Support injection of clock.
   217  func (exp *ControlleeExpectations) isExpired() bool {
   218  	return clock.RealClock{}.Since(exp.timestamp) > ExpectationsTimeout
   219  }
   220  
   221  // SetExpectations registers new expectations for the given controller. Forgets existing expectations.
   222  func (r *ControllerExpectations) SetExpectations(logger klog.Logger, controllerKey string, add, del int) error {
   223  	exp := &ControlleeExpectations{add: int64(add), del: int64(del), key: controllerKey, timestamp: clock.RealClock{}.Now()}
   224  	logger.V(4).Info("Setting expectations", "expectations", exp)
   225  	return r.Add(exp)
   226  }
   227  
   228  func (r *ControllerExpectations) ExpectCreations(logger klog.Logger, controllerKey string, adds int) error {
   229  	return r.SetExpectations(logger, controllerKey, adds, 0)
   230  }
   231  
   232  func (r *ControllerExpectations) ExpectDeletions(logger klog.Logger, controllerKey string, dels int) error {
   233  	return r.SetExpectations(logger, controllerKey, 0, dels)
   234  }
   235  
   236  // Decrements the expectation counts of the given controller.
   237  func (r *ControllerExpectations) LowerExpectations(logger klog.Logger, controllerKey string, add, del int) {
   238  	if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists {
   239  		exp.Add(int64(-add), int64(-del))
   240  		// The expectations might've been modified since the update on the previous line.
   241  		logger.V(4).Info("Lowered expectations", "expectations", exp)
   242  	}
   243  }
   244  
   245  // Increments the expectation counts of the given controller.
   246  func (r *ControllerExpectations) RaiseExpectations(logger klog.Logger, controllerKey string, add, del int) {
   247  	if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists {
   248  		exp.Add(int64(add), int64(del))
   249  		// The expectations might've been modified since the update on the previous line.
   250  		logger.V(4).Info("Raised expectations", "expectations", exp)
   251  	}
   252  }
   253  
   254  // CreationObserved atomically decrements the `add` expectation count of the given controller.
   255  func (r *ControllerExpectations) CreationObserved(logger klog.Logger, controllerKey string) {
   256  	r.LowerExpectations(logger, controllerKey, 1, 0)
   257  }
   258  
   259  // DeletionObserved atomically decrements the `del` expectation count of the given controller.
   260  func (r *ControllerExpectations) DeletionObserved(logger klog.Logger, controllerKey string) {
   261  	r.LowerExpectations(logger, controllerKey, 0, 1)
   262  }
   263  
   264  // ControlleeExpectations track controllee creates/deletes.
   265  type ControlleeExpectations struct {
   266  	// Important: Since these two int64 fields are using sync/atomic, they have to be at the top of the struct due to a bug on 32-bit platforms
   267  	// See: https://golang.org/pkg/sync/atomic/ for more information
   268  	add       int64
   269  	del       int64
   270  	key       string
   271  	timestamp time.Time
   272  }
   273  
   274  // Add increments the add and del counters.
   275  func (e *ControlleeExpectations) Add(add, del int64) {
   276  	atomic.AddInt64(&e.add, add)
   277  	atomic.AddInt64(&e.del, del)
   278  }
   279  
   280  // Fulfilled returns true if this expectation has been fulfilled.
   281  func (e *ControlleeExpectations) Fulfilled() bool {
   282  	// TODO: think about why this line being atomic doesn't matter
   283  	return atomic.LoadInt64(&e.add) <= 0 && atomic.LoadInt64(&e.del) <= 0
   284  }
   285  
   286  // GetExpectations returns the add and del expectations of the controllee.
   287  func (e *ControlleeExpectations) GetExpectations() (int64, int64) {
   288  	return atomic.LoadInt64(&e.add), atomic.LoadInt64(&e.del)
   289  }
   290  
   291  // MarshalLog makes a thread-safe copy of the values of the expectations that
   292  // can be used for logging.
   293  func (e *ControlleeExpectations) MarshalLog() interface{} {
   294  	return struct {
   295  		add int64
   296  		del int64
   297  		key string
   298  	}{
   299  		add: atomic.LoadInt64(&e.add),
   300  		del: atomic.LoadInt64(&e.del),
   301  		key: e.key,
   302  	}
   303  }
   304  
   305  // NewControllerExpectations returns a store for ControllerExpectations.
   306  func NewControllerExpectations() *ControllerExpectations {
   307  	return &ControllerExpectations{cache.NewStore(ExpKeyFunc)}
   308  }
   309  
   310  // UIDSetKeyFunc to parse out the key from a UIDSet.
   311  var UIDSetKeyFunc = func(obj interface{}) (string, error) {
   312  	if u, ok := obj.(*UIDSet); ok {
   313  		return u.key, nil
   314  	}
   315  	return "", fmt.Errorf("could not find key for obj %#v", obj)
   316  }
   317  
   318  // UIDSet holds a key and a set of UIDs. Used by the
   319  // UIDTrackingControllerExpectations to remember which UID it has seen/still
   320  // waiting for.
   321  type UIDSet struct {
   322  	sets.String
   323  	key string
   324  }
   325  
   326  // UIDTrackingControllerExpectations tracks the UID of the pods it deletes.
   327  // This cache is needed over plain old expectations to safely handle graceful
   328  // deletion. The desired behavior is to treat an update that sets the
   329  // DeletionTimestamp on an object as a delete. To do so consistently, one needs
   330  // to remember the expected deletes so they aren't double counted.
   331  // TODO: Track creates as well (#22599)
   332  type UIDTrackingControllerExpectations struct {
   333  	ControllerExpectationsInterface
   334  	// TODO: There is a much nicer way to do this that involves a single store,
   335  	// a lock per entry, and a ControlleeExpectationsInterface type.
   336  	uidStoreLock sync.Mutex
   337  	// Store used for the UIDs associated with any expectation tracked via the
   338  	// ControllerExpectationsInterface.
   339  	uidStore cache.Store
   340  }
   341  
   342  // GetUIDs is a convenience method to avoid exposing the set of expected uids.
   343  // The returned set is not thread safe, all modifications must be made holding
   344  // the uidStoreLock.
   345  func (u *UIDTrackingControllerExpectations) GetUIDs(controllerKey string) sets.String {
   346  	if uid, exists, err := u.uidStore.GetByKey(controllerKey); err == nil && exists {
   347  		return uid.(*UIDSet).String
   348  	}
   349  	return nil
   350  }
   351  
   352  // ExpectDeletions records expectations for the given deleteKeys, against the given controller.
   353  func (u *UIDTrackingControllerExpectations) ExpectDeletions(logger klog.Logger, rcKey string, deletedKeys []string) error {
   354  	expectedUIDs := sets.NewString()
   355  	for _, k := range deletedKeys {
   356  		expectedUIDs.Insert(k)
   357  	}
   358  	logger.V(4).Info("Controller waiting on deletions", "controller", rcKey, "keys", deletedKeys)
   359  	u.uidStoreLock.Lock()
   360  	defer u.uidStoreLock.Unlock()
   361  
   362  	if existing := u.GetUIDs(rcKey); existing != nil && existing.Len() != 0 {
   363  		logger.Error(nil, "Clobbering existing delete keys", "keys", existing)
   364  	}
   365  	if err := u.uidStore.Add(&UIDSet{expectedUIDs, rcKey}); err != nil {
   366  		return err
   367  	}
   368  	return u.ControllerExpectationsInterface.ExpectDeletions(logger, rcKey, expectedUIDs.Len())
   369  }
   370  
   371  // DeletionObserved records the given deleteKey as a deletion, for the given rc.
   372  func (u *UIDTrackingControllerExpectations) DeletionObserved(logger klog.Logger, rcKey, deleteKey string) {
   373  	u.uidStoreLock.Lock()
   374  	defer u.uidStoreLock.Unlock()
   375  
   376  	uids := u.GetUIDs(rcKey)
   377  	if uids != nil && uids.Has(deleteKey) {
   378  		logger.V(4).Info("Controller received delete for pod", "controller", rcKey, "key", deleteKey)
   379  		u.ControllerExpectationsInterface.DeletionObserved(logger, rcKey)
   380  		uids.Delete(deleteKey)
   381  	}
   382  }
   383  
   384  // DeleteExpectations deletes the UID set and invokes DeleteExpectations on the
   385  // underlying ControllerExpectationsInterface.
   386  func (u *UIDTrackingControllerExpectations) DeleteExpectations(logger klog.Logger, rcKey string) {
   387  	u.uidStoreLock.Lock()
   388  	defer u.uidStoreLock.Unlock()
   389  
   390  	u.ControllerExpectationsInterface.DeleteExpectations(logger, rcKey)
   391  	if uidExp, exists, err := u.uidStore.GetByKey(rcKey); err == nil && exists {
   392  		if err := u.uidStore.Delete(uidExp); err != nil {
   393  			logger.V(2).Info("Error deleting uid expectations", "controller", rcKey, "err", err)
   394  		}
   395  	}
   396  }
   397  
   398  // NewUIDTrackingControllerExpectations returns a wrapper around
   399  // ControllerExpectations that is aware of deleteKeys.
   400  func NewUIDTrackingControllerExpectations(ce ControllerExpectationsInterface) *UIDTrackingControllerExpectations {
   401  	return &UIDTrackingControllerExpectations{ControllerExpectationsInterface: ce, uidStore: cache.NewStore(UIDSetKeyFunc)}
   402  }
   403  
   404  // Reasons for pod events
   405  const (
   406  	// FailedCreatePodReason is added in an event and in a replica set condition
   407  	// when a pod for a replica set is failed to be created.
   408  	FailedCreatePodReason = "FailedCreate"
   409  	// SuccessfulCreatePodReason is added in an event when a pod for a replica set
   410  	// is successfully created.
   411  	SuccessfulCreatePodReason = "SuccessfulCreate"
   412  	// FailedDeletePodReason is added in an event and in a replica set condition
   413  	// when a pod for a replica set is failed to be deleted.
   414  	FailedDeletePodReason = "FailedDelete"
   415  	// SuccessfulDeletePodReason is added in an event when a pod for a replica set
   416  	// is successfully deleted.
   417  	SuccessfulDeletePodReason = "SuccessfulDelete"
   418  )
   419  
   420  // RSControlInterface is an interface that knows how to add or delete
   421  // ReplicaSets, as well as increment or decrement them. It is used
   422  // by the deployment controller to ease testing of actions that it takes.
   423  type RSControlInterface interface {
   424  	PatchReplicaSet(ctx context.Context, namespace, name string, data []byte) error
   425  }
   426  
   427  // RealRSControl is the default implementation of RSControllerInterface.
   428  type RealRSControl struct {
   429  	KubeClient clientset.Interface
   430  	Recorder   record.EventRecorder
   431  }
   432  
   433  var _ RSControlInterface = &RealRSControl{}
   434  
   435  func (r RealRSControl) PatchReplicaSet(ctx context.Context, namespace, name string, data []byte) error {
   436  	_, err := r.KubeClient.AppsV1().ReplicaSets(namespace).Patch(ctx, name, types.StrategicMergePatchType, data, metav1.PatchOptions{})
   437  	return err
   438  }
   439  
   440  // TODO: merge the controller revision interface in controller_history.go with this one
   441  // ControllerRevisionControlInterface is an interface that knows how to patch
   442  // ControllerRevisions, as well as increment or decrement them. It is used
   443  // by the daemonset controller to ease testing of actions that it takes.
   444  type ControllerRevisionControlInterface interface {
   445  	PatchControllerRevision(ctx context.Context, namespace, name string, data []byte) error
   446  }
   447  
   448  // RealControllerRevisionControl is the default implementation of ControllerRevisionControlInterface.
   449  type RealControllerRevisionControl struct {
   450  	KubeClient clientset.Interface
   451  }
   452  
   453  var _ ControllerRevisionControlInterface = &RealControllerRevisionControl{}
   454  
   455  func (r RealControllerRevisionControl) PatchControllerRevision(ctx context.Context, namespace, name string, data []byte) error {
   456  	_, err := r.KubeClient.AppsV1().ControllerRevisions(namespace).Patch(ctx, name, types.StrategicMergePatchType, data, metav1.PatchOptions{})
   457  	return err
   458  }
   459  
   460  // PodControlInterface is an interface that knows how to add or delete pods
   461  // created as an interface to allow testing.
   462  type PodControlInterface interface {
   463  	// CreatePods creates new pods according to the spec, and sets object as the pod's controller.
   464  	CreatePods(ctx context.Context, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error
   465  	// CreatePodsWithGenerateName creates new pods according to the spec, sets object as the pod's controller and sets pod's generateName.
   466  	CreatePodsWithGenerateName(ctx context.Context, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference, generateName string) error
   467  	// DeletePod deletes the pod identified by podID.
   468  	DeletePod(ctx context.Context, namespace string, podID string, object runtime.Object) error
   469  	// PatchPod patches the pod.
   470  	PatchPod(ctx context.Context, namespace, name string, data []byte) error
   471  }
   472  
   473  // RealPodControl is the default implementation of PodControlInterface.
   474  type RealPodControl struct {
   475  	KubeClient clientset.Interface
   476  	Recorder   record.EventRecorder
   477  }
   478  
   479  var _ PodControlInterface = &RealPodControl{}
   480  
   481  func getPodsLabelSet(template *v1.PodTemplateSpec) labels.Set {
   482  	desiredLabels := make(labels.Set)
   483  	for k, v := range template.Labels {
   484  		desiredLabels[k] = v
   485  	}
   486  	return desiredLabels
   487  }
   488  
   489  func getPodsFinalizers(template *v1.PodTemplateSpec) []string {
   490  	desiredFinalizers := make([]string, len(template.Finalizers))
   491  	copy(desiredFinalizers, template.Finalizers)
   492  	return desiredFinalizers
   493  }
   494  
   495  func getPodsAnnotationSet(template *v1.PodTemplateSpec) labels.Set {
   496  	desiredAnnotations := make(labels.Set)
   497  	for k, v := range template.Annotations {
   498  		desiredAnnotations[k] = v
   499  	}
   500  	return desiredAnnotations
   501  }
   502  
   503  func getPodsPrefix(controllerName string) string {
   504  	// use the dash (if the name isn't too long) to make the pod name a bit prettier
   505  	prefix := fmt.Sprintf("%s-", controllerName)
   506  	if len(validation.ValidatePodName(prefix, true)) != 0 {
   507  		prefix = controllerName
   508  	}
   509  	return prefix
   510  }
   511  
   512  func validateControllerRef(controllerRef *metav1.OwnerReference) error {
   513  	if controllerRef == nil {
   514  		return fmt.Errorf("controllerRef is nil")
   515  	}
   516  	if len(controllerRef.APIVersion) == 0 {
   517  		return fmt.Errorf("controllerRef has empty APIVersion")
   518  	}
   519  	if len(controllerRef.Kind) == 0 {
   520  		return fmt.Errorf("controllerRef has empty Kind")
   521  	}
   522  	if controllerRef.Controller == nil || !*controllerRef.Controller {
   523  		return fmt.Errorf("controllerRef.Controller is not set to true")
   524  	}
   525  	if controllerRef.BlockOwnerDeletion == nil || !*controllerRef.BlockOwnerDeletion {
   526  		return fmt.Errorf("controllerRef.BlockOwnerDeletion is not set")
   527  	}
   528  	return nil
   529  }
   530  
   531  func (r RealPodControl) CreatePods(ctx context.Context, namespace string, template *v1.PodTemplateSpec, controllerObject runtime.Object, controllerRef *metav1.OwnerReference) error {
   532  	return r.CreatePodsWithGenerateName(ctx, namespace, template, controllerObject, controllerRef, "")
   533  }
   534  
   535  func (r RealPodControl) CreatePodsWithGenerateName(ctx context.Context, namespace string, template *v1.PodTemplateSpec, controllerObject runtime.Object, controllerRef *metav1.OwnerReference, generateName string) error {
   536  	if err := validateControllerRef(controllerRef); err != nil {
   537  		return err
   538  	}
   539  	pod, err := GetPodFromTemplate(template, controllerObject, controllerRef)
   540  	if err != nil {
   541  		return err
   542  	}
   543  	if len(generateName) > 0 {
   544  		pod.ObjectMeta.GenerateName = generateName
   545  	}
   546  	return r.createPods(ctx, namespace, pod, controllerObject)
   547  }
   548  
   549  func (r RealPodControl) PatchPod(ctx context.Context, namespace, name string, data []byte) error {
   550  	_, err := r.KubeClient.CoreV1().Pods(namespace).Patch(ctx, name, types.StrategicMergePatchType, data, metav1.PatchOptions{})
   551  	return err
   552  }
   553  
   554  func GetPodFromTemplate(template *v1.PodTemplateSpec, parentObject runtime.Object, controllerRef *metav1.OwnerReference) (*v1.Pod, error) {
   555  	desiredLabels := getPodsLabelSet(template)
   556  	desiredFinalizers := getPodsFinalizers(template)
   557  	desiredAnnotations := getPodsAnnotationSet(template)
   558  	accessor, err := meta.Accessor(parentObject)
   559  	if err != nil {
   560  		return nil, fmt.Errorf("parentObject does not have ObjectMeta, %v", err)
   561  	}
   562  	prefix := getPodsPrefix(accessor.GetName())
   563  
   564  	pod := &v1.Pod{
   565  		ObjectMeta: metav1.ObjectMeta{
   566  			Labels:       desiredLabels,
   567  			Annotations:  desiredAnnotations,
   568  			GenerateName: prefix,
   569  			Finalizers:   desiredFinalizers,
   570  		},
   571  	}
   572  	if controllerRef != nil {
   573  		pod.OwnerReferences = append(pod.OwnerReferences, *controllerRef)
   574  	}
   575  	pod.Spec = *template.Spec.DeepCopy()
   576  	return pod, nil
   577  }
   578  
   579  func (r RealPodControl) createPods(ctx context.Context, namespace string, pod *v1.Pod, object runtime.Object) error {
   580  	if len(labels.Set(pod.Labels)) == 0 {
   581  		return fmt.Errorf("unable to create pods, no labels")
   582  	}
   583  	newPod, err := r.KubeClient.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
   584  	if err != nil {
   585  		// only send an event if the namespace isn't terminating
   586  		if !apierrors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
   587  			r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err)
   588  		}
   589  		return err
   590  	}
   591  	logger := klog.FromContext(ctx)
   592  	accessor, err := meta.Accessor(object)
   593  	if err != nil {
   594  		logger.Error(err, "parentObject does not have ObjectMeta")
   595  		return nil
   596  	}
   597  	logger.V(4).Info("Controller created pod", "controller", accessor.GetName(), "pod", klog.KObj(newPod))
   598  	r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulCreatePodReason, "Created pod: %v", newPod.Name)
   599  
   600  	return nil
   601  }
   602  
   603  func (r RealPodControl) DeletePod(ctx context.Context, namespace string, podID string, object runtime.Object) error {
   604  	accessor, err := meta.Accessor(object)
   605  	if err != nil {
   606  		return fmt.Errorf("object does not have ObjectMeta, %v", err)
   607  	}
   608  	logger := klog.FromContext(ctx)
   609  	logger.V(2).Info("Deleting pod", "controller", accessor.GetName(), "pod", klog.KRef(namespace, podID))
   610  	if err := r.KubeClient.CoreV1().Pods(namespace).Delete(ctx, podID, metav1.DeleteOptions{}); err != nil {
   611  		if apierrors.IsNotFound(err) {
   612  			logger.V(4).Info("Pod has already been deleted.", "pod", klog.KRef(namespace, podID))
   613  			return err
   614  		}
   615  		r.Recorder.Eventf(object, v1.EventTypeWarning, FailedDeletePodReason, "Error deleting: %v", err)
   616  		return fmt.Errorf("unable to delete pods: %v", err)
   617  	}
   618  	r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulDeletePodReason, "Deleted pod: %v", podID)
   619  
   620  	return nil
   621  }
   622  
   623  type FakePodControl struct {
   624  	sync.Mutex
   625  	Templates       []v1.PodTemplateSpec
   626  	ControllerRefs  []metav1.OwnerReference
   627  	DeletePodName   []string
   628  	Patches         [][]byte
   629  	Err             error
   630  	CreateLimit     int
   631  	CreateCallCount int
   632  }
   633  
   634  var _ PodControlInterface = &FakePodControl{}
   635  
   636  func (f *FakePodControl) PatchPod(ctx context.Context, namespace, name string, data []byte) error {
   637  	f.Lock()
   638  	defer f.Unlock()
   639  	f.Patches = append(f.Patches, data)
   640  	if f.Err != nil {
   641  		return f.Err
   642  	}
   643  	return nil
   644  }
   645  
   646  func (f *FakePodControl) CreatePods(ctx context.Context, namespace string, spec *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error {
   647  	return f.CreatePodsWithGenerateName(ctx, namespace, spec, object, controllerRef, "")
   648  }
   649  
   650  func (f *FakePodControl) CreatePodsWithGenerateName(ctx context.Context, namespace string, spec *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference, generateNamePrefix string) error {
   651  	f.Lock()
   652  	defer f.Unlock()
   653  	f.CreateCallCount++
   654  	if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit {
   655  		return fmt.Errorf("not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount)
   656  	}
   657  	spec.GenerateName = generateNamePrefix
   658  	f.Templates = append(f.Templates, *spec)
   659  	f.ControllerRefs = append(f.ControllerRefs, *controllerRef)
   660  	if f.Err != nil {
   661  		return f.Err
   662  	}
   663  	return nil
   664  }
   665  
   666  func (f *FakePodControl) DeletePod(ctx context.Context, namespace string, podID string, object runtime.Object) error {
   667  	f.Lock()
   668  	defer f.Unlock()
   669  	f.DeletePodName = append(f.DeletePodName, podID)
   670  	if f.Err != nil {
   671  		return f.Err
   672  	}
   673  	return nil
   674  }
   675  
   676  func (f *FakePodControl) Clear() {
   677  	f.Lock()
   678  	defer f.Unlock()
   679  	f.DeletePodName = []string{}
   680  	f.Templates = []v1.PodTemplateSpec{}
   681  	f.ControllerRefs = []metav1.OwnerReference{}
   682  	f.Patches = [][]byte{}
   683  	f.CreateLimit = 0
   684  	f.CreateCallCount = 0
   685  }
   686  
   687  // ByLogging allows custom sorting of pods so the best one can be picked for getting its logs.
   688  type ByLogging []*v1.Pod
   689  
   690  func (s ByLogging) Len() int      { return len(s) }
   691  func (s ByLogging) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
   692  
   693  func (s ByLogging) Less(i, j int) bool {
   694  	// 1. assigned < unassigned
   695  	if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) {
   696  		return len(s[i].Spec.NodeName) > 0
   697  	}
   698  	// 2. PodRunning < PodUnknown < PodPending
   699  	if s[i].Status.Phase != s[j].Status.Phase {
   700  		return podPhaseToOrdinal[s[i].Status.Phase] > podPhaseToOrdinal[s[j].Status.Phase]
   701  	}
   702  	// 3. ready < not ready
   703  	if podutil.IsPodReady(s[i]) != podutil.IsPodReady(s[j]) {
   704  		return podutil.IsPodReady(s[i])
   705  	}
   706  	// TODO: take availability into account when we push minReadySeconds information from deployment into pods,
   707  	//       see https://github.com/kubernetes/kubernetes/issues/22065
   708  	// 4. Been ready for more time < less time < empty time
   709  	if podutil.IsPodReady(s[i]) && podutil.IsPodReady(s[j]) {
   710  		readyTime1 := podReadyTime(s[i])
   711  		readyTime2 := podReadyTime(s[j])
   712  		if !readyTime1.Equal(readyTime2) {
   713  			return afterOrZero(readyTime2, readyTime1)
   714  		}
   715  	}
   716  	// 5. Pods with containers with higher restart counts < lower restart counts
   717  	if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) {
   718  		return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j])
   719  	}
   720  	// 6. older pods < newer pods < empty timestamp pods
   721  	if !s[i].CreationTimestamp.Equal(&s[j].CreationTimestamp) {
   722  		return afterOrZero(&s[j].CreationTimestamp, &s[i].CreationTimestamp)
   723  	}
   724  	return false
   725  }
   726  
   727  // ActivePods type allows custom sorting of pods so a controller can pick the best ones to delete.
   728  type ActivePods []*v1.Pod
   729  
   730  func (s ActivePods) Len() int      { return len(s) }
   731  func (s ActivePods) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
   732  
   733  func (s ActivePods) Less(i, j int) bool {
   734  	// 1. Unassigned < assigned
   735  	// If only one of the pods is unassigned, the unassigned one is smaller
   736  	if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) {
   737  		return len(s[i].Spec.NodeName) == 0
   738  	}
   739  	// 2. PodPending < PodUnknown < PodRunning
   740  	if podPhaseToOrdinal[s[i].Status.Phase] != podPhaseToOrdinal[s[j].Status.Phase] {
   741  		return podPhaseToOrdinal[s[i].Status.Phase] < podPhaseToOrdinal[s[j].Status.Phase]
   742  	}
   743  	// 3. Not ready < ready
   744  	// If only one of the pods is not ready, the not ready one is smaller
   745  	if podutil.IsPodReady(s[i]) != podutil.IsPodReady(s[j]) {
   746  		return !podutil.IsPodReady(s[i])
   747  	}
   748  	// TODO: take availability into account when we push minReadySeconds information from deployment into pods,
   749  	//       see https://github.com/kubernetes/kubernetes/issues/22065
   750  	// 4. Been ready for empty time < less time < more time
   751  	// If both pods are ready, the latest ready one is smaller
   752  	if podutil.IsPodReady(s[i]) && podutil.IsPodReady(s[j]) {
   753  		readyTime1 := podReadyTime(s[i])
   754  		readyTime2 := podReadyTime(s[j])
   755  		if !readyTime1.Equal(readyTime2) {
   756  			return afterOrZero(readyTime1, readyTime2)
   757  		}
   758  	}
   759  	// 5. Pods with containers with higher restart counts < lower restart counts
   760  	if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) {
   761  		return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j])
   762  	}
   763  	// 6. Empty creation time pods < newer pods < older pods
   764  	if !s[i].CreationTimestamp.Equal(&s[j].CreationTimestamp) {
   765  		return afterOrZero(&s[i].CreationTimestamp, &s[j].CreationTimestamp)
   766  	}
   767  	return false
   768  }
   769  
   770  // ActivePodsWithRanks is a sortable list of pods and a list of corresponding
   771  // ranks which will be considered during sorting.  The two lists must have equal
   772  // length.  After sorting, the pods will be ordered as follows, applying each
   773  // rule in turn until one matches:
   774  //
   775  //  1. If only one of the pods is assigned to a node, the pod that is not
   776  //     assigned comes before the pod that is.
   777  //  2. If the pods' phases differ, a pending pod comes before a pod whose phase
   778  //     is unknown, and a pod whose phase is unknown comes before a running pod.
   779  //  3. If exactly one of the pods is ready, the pod that is not ready comes
   780  //     before the ready pod.
   781  //  4. If controller.kubernetes.io/pod-deletion-cost annotation is set, then
   782  //     the pod with the lower value will come first.
   783  //  5. If the pods' ranks differ, the pod with greater rank comes before the pod
   784  //     with lower rank.
   785  //  6. If both pods are ready but have not been ready for the same amount of
   786  //     time, the pod that has been ready for a shorter amount of time comes
   787  //     before the pod that has been ready for longer.
   788  //  7. If one pod has a container that has restarted more than any container in
   789  //     the other pod, the pod with the container with more restarts comes
   790  //     before the other pod.
   791  //  8. If the pods' creation times differ, the pod that was created more recently
   792  //     comes before the older pod.
   793  //
   794  // In 6 and 8, times are compared in a logarithmic scale. This allows a level
   795  // of randomness among equivalent Pods when sorting. If two pods have the same
   796  // logarithmic rank, they are sorted by UUID to provide a pseudorandom order.
   797  //
   798  // If none of these rules matches, the second pod comes before the first pod.
   799  //
   800  // The intention of this ordering is to put pods that should be preferred for
   801  // deletion first in the list.
   802  type ActivePodsWithRanks struct {
   803  	// Pods is a list of pods.
   804  	Pods []*v1.Pod
   805  
   806  	// Rank is a ranking of pods.  This ranking is used during sorting when
   807  	// comparing two pods that are both scheduled, in the same phase, and
   808  	// having the same ready status.
   809  	Rank []int
   810  
   811  	// Now is a reference timestamp for doing logarithmic timestamp comparisons.
   812  	// If zero, comparison happens without scaling.
   813  	Now metav1.Time
   814  }
   815  
   816  func (s ActivePodsWithRanks) Len() int {
   817  	return len(s.Pods)
   818  }
   819  
   820  func (s ActivePodsWithRanks) Swap(i, j int) {
   821  	s.Pods[i], s.Pods[j] = s.Pods[j], s.Pods[i]
   822  	s.Rank[i], s.Rank[j] = s.Rank[j], s.Rank[i]
   823  }
   824  
   825  // Less compares two pods with corresponding ranks and returns true if the first
   826  // one should be preferred for deletion.
   827  func (s ActivePodsWithRanks) Less(i, j int) bool {
   828  	// 1. Unassigned < assigned
   829  	// If only one of the pods is unassigned, the unassigned one is smaller
   830  	if s.Pods[i].Spec.NodeName != s.Pods[j].Spec.NodeName && (len(s.Pods[i].Spec.NodeName) == 0 || len(s.Pods[j].Spec.NodeName) == 0) {
   831  		return len(s.Pods[i].Spec.NodeName) == 0
   832  	}
   833  	// 2. PodPending < PodUnknown < PodRunning
   834  	if podPhaseToOrdinal[s.Pods[i].Status.Phase] != podPhaseToOrdinal[s.Pods[j].Status.Phase] {
   835  		return podPhaseToOrdinal[s.Pods[i].Status.Phase] < podPhaseToOrdinal[s.Pods[j].Status.Phase]
   836  	}
   837  	// 3. Not ready < ready
   838  	// If only one of the pods is not ready, the not ready one is smaller
   839  	if podutil.IsPodReady(s.Pods[i]) != podutil.IsPodReady(s.Pods[j]) {
   840  		return !podutil.IsPodReady(s.Pods[i])
   841  	}
   842  
   843  	// 4. lower pod-deletion-cost < higher pod-deletion cost
   844  	if utilfeature.DefaultFeatureGate.Enabled(features.PodDeletionCost) {
   845  		pi, _ := helper.GetDeletionCostFromPodAnnotations(s.Pods[i].Annotations)
   846  		pj, _ := helper.GetDeletionCostFromPodAnnotations(s.Pods[j].Annotations)
   847  		if pi != pj {
   848  			return pi < pj
   849  		}
   850  	}
   851  
   852  	// 5. Doubled up < not doubled up
   853  	// If one of the two pods is on the same node as one or more additional
   854  	// ready pods that belong to the same replicaset, whichever pod has more
   855  	// colocated ready pods is less
   856  	if s.Rank[i] != s.Rank[j] {
   857  		return s.Rank[i] > s.Rank[j]
   858  	}
   859  	// TODO: take availability into account when we push minReadySeconds information from deployment into pods,
   860  	//       see https://github.com/kubernetes/kubernetes/issues/22065
   861  	// 6. Been ready for empty time < less time < more time
   862  	// If both pods are ready, the latest ready one is smaller
   863  	if podutil.IsPodReady(s.Pods[i]) && podutil.IsPodReady(s.Pods[j]) {
   864  		readyTime1 := podReadyTime(s.Pods[i])
   865  		readyTime2 := podReadyTime(s.Pods[j])
   866  		if !readyTime1.Equal(readyTime2) {
   867  			if !utilfeature.DefaultFeatureGate.Enabled(features.LogarithmicScaleDown) {
   868  				return afterOrZero(readyTime1, readyTime2)
   869  			} else {
   870  				if s.Now.IsZero() || readyTime1.IsZero() || readyTime2.IsZero() {
   871  					return afterOrZero(readyTime1, readyTime2)
   872  				}
   873  				rankDiff := logarithmicRankDiff(*readyTime1, *readyTime2, s.Now)
   874  				if rankDiff == 0 {
   875  					return s.Pods[i].UID < s.Pods[j].UID
   876  				}
   877  				return rankDiff < 0
   878  			}
   879  		}
   880  	}
   881  	// 7. Pods with containers with higher restart counts < lower restart counts
   882  	if maxContainerRestarts(s.Pods[i]) != maxContainerRestarts(s.Pods[j]) {
   883  		return maxContainerRestarts(s.Pods[i]) > maxContainerRestarts(s.Pods[j])
   884  	}
   885  	// 8. Empty creation time pods < newer pods < older pods
   886  	if !s.Pods[i].CreationTimestamp.Equal(&s.Pods[j].CreationTimestamp) {
   887  		if !utilfeature.DefaultFeatureGate.Enabled(features.LogarithmicScaleDown) {
   888  			return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp)
   889  		} else {
   890  			if s.Now.IsZero() || s.Pods[i].CreationTimestamp.IsZero() || s.Pods[j].CreationTimestamp.IsZero() {
   891  				return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp)
   892  			}
   893  			rankDiff := logarithmicRankDiff(s.Pods[i].CreationTimestamp, s.Pods[j].CreationTimestamp, s.Now)
   894  			if rankDiff == 0 {
   895  				return s.Pods[i].UID < s.Pods[j].UID
   896  			}
   897  			return rankDiff < 0
   898  		}
   899  	}
   900  	return false
   901  }
   902  
   903  // afterOrZero checks if time t1 is after time t2; if one of them
   904  // is zero, the zero time is seen as after non-zero time.
   905  func afterOrZero(t1, t2 *metav1.Time) bool {
   906  	if t1.Time.IsZero() || t2.Time.IsZero() {
   907  		return t1.Time.IsZero()
   908  	}
   909  	return t1.After(t2.Time)
   910  }
   911  
   912  // logarithmicRankDiff calculates the base-2 logarithmic ranks of 2 timestamps,
   913  // compared to the current timestamp
   914  func logarithmicRankDiff(t1, t2, now metav1.Time) int64 {
   915  	d1 := now.Sub(t1.Time)
   916  	d2 := now.Sub(t2.Time)
   917  	r1 := int64(-1)
   918  	r2 := int64(-1)
   919  	if d1 > 0 {
   920  		r1 = int64(math.Log2(float64(d1)))
   921  	}
   922  	if d2 > 0 {
   923  		r2 = int64(math.Log2(float64(d2)))
   924  	}
   925  	return r1 - r2
   926  }
   927  
   928  func podReadyTime(pod *v1.Pod) *metav1.Time {
   929  	if podutil.IsPodReady(pod) {
   930  		for _, c := range pod.Status.Conditions {
   931  			// we only care about pod ready conditions
   932  			if c.Type == v1.PodReady && c.Status == v1.ConditionTrue {
   933  				return &c.LastTransitionTime
   934  			}
   935  		}
   936  	}
   937  	return &metav1.Time{}
   938  }
   939  
   940  func maxContainerRestarts(pod *v1.Pod) int {
   941  	maxRestarts := 0
   942  	for _, c := range pod.Status.ContainerStatuses {
   943  		maxRestarts = integer.IntMax(maxRestarts, int(c.RestartCount))
   944  	}
   945  	return maxRestarts
   946  }
   947  
   948  // FilterActivePods returns pods that have not terminated.
   949  func FilterActivePods(logger klog.Logger, pods []*v1.Pod) []*v1.Pod {
   950  	var result []*v1.Pod
   951  	for _, p := range pods {
   952  		if IsPodActive(p) {
   953  			result = append(result, p)
   954  		} else {
   955  			logger.V(4).Info("Ignoring inactive pod", "pod", klog.KObj(p), "phase", p.Status.Phase, "deletionTime", p.DeletionTimestamp)
   956  		}
   957  	}
   958  	return result
   959  }
   960  
   961  func FilterTerminatingPods(pods []*v1.Pod) []*v1.Pod {
   962  	var result []*v1.Pod
   963  	for _, p := range pods {
   964  		if IsPodTerminating(p) {
   965  			result = append(result, p)
   966  		}
   967  	}
   968  	return result
   969  }
   970  
   971  func CountTerminatingPods(pods []*v1.Pod) int32 {
   972  	numberOfTerminatingPods := 0
   973  	for _, p := range pods {
   974  		if IsPodTerminating(p) {
   975  			numberOfTerminatingPods += 1
   976  		}
   977  	}
   978  	return int32(numberOfTerminatingPods)
   979  }
   980  
   981  func IsPodActive(p *v1.Pod) bool {
   982  	return v1.PodSucceeded != p.Status.Phase &&
   983  		v1.PodFailed != p.Status.Phase &&
   984  		p.DeletionTimestamp == nil
   985  }
   986  
   987  func IsPodTerminating(p *v1.Pod) bool {
   988  	return !podutil.IsPodTerminal(p) &&
   989  		p.DeletionTimestamp != nil
   990  }
   991  
   992  // FilterActiveReplicaSets returns replica sets that have (or at least ought to have) pods.
   993  func FilterActiveReplicaSets(replicaSets []*apps.ReplicaSet) []*apps.ReplicaSet {
   994  	activeFilter := func(rs *apps.ReplicaSet) bool {
   995  		return rs != nil && *(rs.Spec.Replicas) > 0
   996  	}
   997  	return FilterReplicaSets(replicaSets, activeFilter)
   998  }
   999  
  1000  type filterRS func(rs *apps.ReplicaSet) bool
  1001  
  1002  // FilterReplicaSets returns replica sets that are filtered by filterFn (all returned ones should match filterFn).
  1003  func FilterReplicaSets(RSes []*apps.ReplicaSet, filterFn filterRS) []*apps.ReplicaSet {
  1004  	var filtered []*apps.ReplicaSet
  1005  	for i := range RSes {
  1006  		if filterFn(RSes[i]) {
  1007  			filtered = append(filtered, RSes[i])
  1008  		}
  1009  	}
  1010  	return filtered
  1011  }
  1012  
  1013  // PodKey returns a key unique to the given pod within a cluster.
  1014  // It's used so we consistently use the same key scheme in this module.
  1015  // It does exactly what cache.MetaNamespaceKeyFunc would have done
  1016  // except there's not possibility for error since we know the exact type.
  1017  func PodKey(pod *v1.Pod) string {
  1018  	return fmt.Sprintf("%v/%v", pod.Namespace, pod.Name)
  1019  }
  1020  
  1021  // ControllersByCreationTimestamp sorts a list of ReplicationControllers by creation timestamp, using their names as a tie breaker.
  1022  type ControllersByCreationTimestamp []*v1.ReplicationController
  1023  
  1024  func (o ControllersByCreationTimestamp) Len() int      { return len(o) }
  1025  func (o ControllersByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
  1026  func (o ControllersByCreationTimestamp) Less(i, j int) bool {
  1027  	if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) {
  1028  		return o[i].Name < o[j].Name
  1029  	}
  1030  	return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp)
  1031  }
  1032  
  1033  // ReplicaSetsByCreationTimestamp sorts a list of ReplicaSet by creation timestamp, using their names as a tie breaker.
  1034  type ReplicaSetsByCreationTimestamp []*apps.ReplicaSet
  1035  
  1036  func (o ReplicaSetsByCreationTimestamp) Len() int      { return len(o) }
  1037  func (o ReplicaSetsByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
  1038  func (o ReplicaSetsByCreationTimestamp) Less(i, j int) bool {
  1039  	if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) {
  1040  		return o[i].Name < o[j].Name
  1041  	}
  1042  	return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp)
  1043  }
  1044  
  1045  // ReplicaSetsBySizeOlder sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker.
  1046  // By using the creation timestamp, this sorts from old to new replica sets.
  1047  type ReplicaSetsBySizeOlder []*apps.ReplicaSet
  1048  
  1049  func (o ReplicaSetsBySizeOlder) Len() int      { return len(o) }
  1050  func (o ReplicaSetsBySizeOlder) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
  1051  func (o ReplicaSetsBySizeOlder) Less(i, j int) bool {
  1052  	if *(o[i].Spec.Replicas) == *(o[j].Spec.Replicas) {
  1053  		return ReplicaSetsByCreationTimestamp(o).Less(i, j)
  1054  	}
  1055  	return *(o[i].Spec.Replicas) > *(o[j].Spec.Replicas)
  1056  }
  1057  
  1058  // ReplicaSetsBySizeNewer sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker.
  1059  // By using the creation timestamp, this sorts from new to old replica sets.
  1060  type ReplicaSetsBySizeNewer []*apps.ReplicaSet
  1061  
  1062  func (o ReplicaSetsBySizeNewer) Len() int      { return len(o) }
  1063  func (o ReplicaSetsBySizeNewer) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
  1064  func (o ReplicaSetsBySizeNewer) Less(i, j int) bool {
  1065  	if *(o[i].Spec.Replicas) == *(o[j].Spec.Replicas) {
  1066  		return ReplicaSetsByCreationTimestamp(o).Less(j, i)
  1067  	}
  1068  	return *(o[i].Spec.Replicas) > *(o[j].Spec.Replicas)
  1069  }
  1070  
  1071  // AddOrUpdateTaintOnNode add taints to the node. If taint was added into node, it'll issue API calls
  1072  // to update nodes; otherwise, no API calls. Return error if any.
  1073  func AddOrUpdateTaintOnNode(ctx context.Context, c clientset.Interface, nodeName string, taints ...*v1.Taint) error {
  1074  	if len(taints) == 0 {
  1075  		return nil
  1076  	}
  1077  	firstTry := true
  1078  	return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error {
  1079  		var err error
  1080  		var oldNode *v1.Node
  1081  		// First we try getting node from the API server cache, as it's cheaper. If it fails
  1082  		// we get it from etcd to be sure to have fresh data.
  1083  		option := metav1.GetOptions{}
  1084  		if firstTry {
  1085  			option.ResourceVersion = "0"
  1086  			firstTry = false
  1087  		}
  1088  		oldNode, err = c.CoreV1().Nodes().Get(ctx, nodeName, option)
  1089  		if err != nil {
  1090  			return err
  1091  		}
  1092  
  1093  		var newNode *v1.Node
  1094  		oldNodeCopy := oldNode
  1095  		updated := false
  1096  		for _, taint := range taints {
  1097  			curNewNode, ok, err := taintutils.AddOrUpdateTaint(oldNodeCopy, taint)
  1098  			if err != nil {
  1099  				return fmt.Errorf("failed to update taint of node")
  1100  			}
  1101  			updated = updated || ok
  1102  			newNode = curNewNode
  1103  			oldNodeCopy = curNewNode
  1104  		}
  1105  		if !updated {
  1106  			return nil
  1107  		}
  1108  		return PatchNodeTaints(ctx, c, nodeName, oldNode, newNode)
  1109  	})
  1110  }
  1111  
  1112  // RemoveTaintOffNode is for cleaning up taints temporarily added to node,
  1113  // won't fail if target taint doesn't exist or has been removed.
  1114  // If passed a node it'll check if there's anything to be done, if taint is not present it won't issue
  1115  // any API calls.
  1116  func RemoveTaintOffNode(ctx context.Context, c clientset.Interface, nodeName string, node *v1.Node, taints ...*v1.Taint) error {
  1117  	if len(taints) == 0 {
  1118  		return nil
  1119  	}
  1120  	// Short circuit for limiting amount of API calls.
  1121  	if node != nil {
  1122  		match := false
  1123  		for _, taint := range taints {
  1124  			if taintutils.TaintExists(node.Spec.Taints, taint) {
  1125  				match = true
  1126  				break
  1127  			}
  1128  		}
  1129  		if !match {
  1130  			return nil
  1131  		}
  1132  	}
  1133  
  1134  	firstTry := true
  1135  	return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error {
  1136  		var err error
  1137  		var oldNode *v1.Node
  1138  		// First we try getting node from the API server cache, as it's cheaper. If it fails
  1139  		// we get it from etcd to be sure to have fresh data.
  1140  		option := metav1.GetOptions{}
  1141  		if firstTry {
  1142  			option.ResourceVersion = "0"
  1143  			firstTry = false
  1144  		}
  1145  		oldNode, err = c.CoreV1().Nodes().Get(ctx, nodeName, option)
  1146  		if err != nil {
  1147  			return err
  1148  		}
  1149  
  1150  		var newNode *v1.Node
  1151  		oldNodeCopy := oldNode
  1152  		updated := false
  1153  		for _, taint := range taints {
  1154  			curNewNode, ok, err := taintutils.RemoveTaint(oldNodeCopy, taint)
  1155  			if err != nil {
  1156  				return fmt.Errorf("failed to remove taint of node")
  1157  			}
  1158  			updated = updated || ok
  1159  			newNode = curNewNode
  1160  			oldNodeCopy = curNewNode
  1161  		}
  1162  		if !updated {
  1163  			return nil
  1164  		}
  1165  		return PatchNodeTaints(ctx, c, nodeName, oldNode, newNode)
  1166  	})
  1167  }
  1168  
  1169  // PatchNodeTaints patches node's taints.
  1170  func PatchNodeTaints(ctx context.Context, c clientset.Interface, nodeName string, oldNode *v1.Node, newNode *v1.Node) error {
  1171  	// Strip base diff node from RV to ensure that our Patch request will set RV to check for conflicts over .spec.taints.
  1172  	// This is needed because .spec.taints does not specify patchMergeKey and patchStrategy and adding them is no longer an option for compatibility reasons.
  1173  	// Using other Patch strategy works for adding new taints, however will not resolve problem with taint removal.
  1174  	oldNodeNoRV := oldNode.DeepCopy()
  1175  	oldNodeNoRV.ResourceVersion = ""
  1176  	oldDataNoRV, err := json.Marshal(&oldNodeNoRV)
  1177  	if err != nil {
  1178  		return fmt.Errorf("failed to marshal old node %#v for node %q: %v", oldNodeNoRV, nodeName, err)
  1179  	}
  1180  
  1181  	newTaints := newNode.Spec.Taints
  1182  	newNodeClone := oldNode.DeepCopy()
  1183  	newNodeClone.Spec.Taints = newTaints
  1184  	newData, err := json.Marshal(newNodeClone)
  1185  	if err != nil {
  1186  		return fmt.Errorf("failed to marshal new node %#v for node %q: %v", newNodeClone, nodeName, err)
  1187  	}
  1188  
  1189  	patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldDataNoRV, newData, v1.Node{})
  1190  	if err != nil {
  1191  		return fmt.Errorf("failed to create patch for node %q: %v", nodeName, err)
  1192  	}
  1193  
  1194  	_, err = c.CoreV1().Nodes().Patch(ctx, nodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{})
  1195  	return err
  1196  }
  1197  
  1198  // ComputeHash returns a hash value calculated from pod template and
  1199  // a collisionCount to avoid hash collision. The hash will be safe encoded to
  1200  // avoid bad words.
  1201  func ComputeHash(template *v1.PodTemplateSpec, collisionCount *int32) string {
  1202  	podTemplateSpecHasher := fnv.New32a()
  1203  	hashutil.DeepHashObject(podTemplateSpecHasher, *template)
  1204  
  1205  	// Add collisionCount in the hash if it exists.
  1206  	if collisionCount != nil {
  1207  		collisionCountBytes := make([]byte, 8)
  1208  		binary.LittleEndian.PutUint32(collisionCountBytes, uint32(*collisionCount))
  1209  		podTemplateSpecHasher.Write(collisionCountBytes)
  1210  	}
  1211  
  1212  	return rand.SafeEncodeString(fmt.Sprint(podTemplateSpecHasher.Sum32()))
  1213  }
  1214  
  1215  func AddOrUpdateLabelsOnNode(kubeClient clientset.Interface, nodeName string, labelsToUpdate map[string]string) error {
  1216  	firstTry := true
  1217  	return clientretry.RetryOnConflict(UpdateLabelBackoff, func() error {
  1218  		var err error
  1219  		var node *v1.Node
  1220  		// First we try getting node from the API server cache, as it's cheaper. If it fails
  1221  		// we get it from etcd to be sure to have fresh data.
  1222  		option := metav1.GetOptions{}
  1223  		if firstTry {
  1224  			option.ResourceVersion = "0"
  1225  			firstTry = false
  1226  		}
  1227  		node, err = kubeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, option)
  1228  		if err != nil {
  1229  			return err
  1230  		}
  1231  
  1232  		// Make a copy of the node and update the labels.
  1233  		newNode := node.DeepCopy()
  1234  		if newNode.Labels == nil {
  1235  			newNode.Labels = make(map[string]string)
  1236  		}
  1237  		for key, value := range labelsToUpdate {
  1238  			newNode.Labels[key] = value
  1239  		}
  1240  
  1241  		oldData, err := json.Marshal(node)
  1242  		if err != nil {
  1243  			return fmt.Errorf("failed to marshal the existing node %#v: %v", node, err)
  1244  		}
  1245  		newData, err := json.Marshal(newNode)
  1246  		if err != nil {
  1247  			return fmt.Errorf("failed to marshal the new node %#v: %v", newNode, err)
  1248  		}
  1249  		patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, &v1.Node{})
  1250  		if err != nil {
  1251  			return fmt.Errorf("failed to create a two-way merge patch: %v", err)
  1252  		}
  1253  		if _, err := kubeClient.CoreV1().Nodes().Patch(context.TODO(), node.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}); err != nil {
  1254  			return fmt.Errorf("failed to patch the node: %v", err)
  1255  		}
  1256  		return nil
  1257  	})
  1258  }