k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/controller_utils.go

k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/controller_utils.go (about)

     1  /*
     2  Copyright 2014 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controller
    18  
    19  import (
    20  	"context"
    21  	"encoding/binary"
    22  	"encoding/json"
    23  	"fmt"
    24  	"hash/fnv"
    25  	"math"
    26  	"sync"
    27  	"sync/atomic"
    28  	"time"
    29  
    30  	apps "k8s.io/api/apps/v1"
    31  	v1 "k8s.io/api/core/v1"
    32  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    33  	"k8s.io/apimachinery/pkg/api/meta"
    34  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    35  	"k8s.io/apimachinery/pkg/labels"
    36  	"k8s.io/apimachinery/pkg/runtime"
    37  	"k8s.io/apimachinery/pkg/types"
    38  	"k8s.io/apimachinery/pkg/util/rand"
    39  	"k8s.io/apimachinery/pkg/util/sets"
    40  	"k8s.io/apimachinery/pkg/util/strategicpatch"
    41  	"k8s.io/apimachinery/pkg/util/wait"
    42  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    43  	clientset "k8s.io/client-go/kubernetes"
    44  	"k8s.io/client-go/tools/cache"
    45  	"k8s.io/client-go/tools/record"
    46  	clientretry "k8s.io/client-go/util/retry"
    47  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    48  	"k8s.io/kubernetes/pkg/apis/core/helper"
    49  	_ "k8s.io/kubernetes/pkg/apis/core/install"
    50  	"k8s.io/kubernetes/pkg/apis/core/validation"
    51  	"k8s.io/kubernetes/pkg/features"
    52  	hashutil "k8s.io/kubernetes/pkg/util/hash"
    53  	taintutils "k8s.io/kubernetes/pkg/util/taints"
    54  	"k8s.io/utils/clock"
    55  
    56  	"k8s.io/klog/v2"
    57  )
    58  
    59  const (
    60  	// If a watch drops a delete event for a pod, it'll take this long
    61  	// before a dormant controller waiting for those packets is woken up anyway. It is
    62  	// specifically targeted at the case where some problem prevents an update
    63  	// of expectations, without it the controller could stay asleep forever. This should
    64  	// be set based on the expected latency of watch events.
    65  	//
    66  	// Currently a controller can service (create *and* observe the watch events for said
    67  	// creation) about 10 pods a second, so it takes about 1 min to service
    68  	// 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s
    69  	// latency/pod at the scale of 3000 pods over 100 nodes.
    70  	ExpectationsTimeout = 5 * time.Minute
    71  	// When batching pod creates, SlowStartInitialBatchSize is the size of the
    72  	// initial batch.  The size of each successive batch is twice the size of
    73  	// the previous batch.  For example, for a value of 1, batch sizes would be
    74  	// 1, 2, 4, 8, ...  and for a value of 10, batch sizes would be
    75  	// 10, 20, 40, 80, ...  Setting the value higher means that quota denials
    76  	// will result in more doomed API calls and associated event spam.  Setting
    77  	// the value lower will result in more API call round trip periods for
    78  	// large batches.
    79  	//
    80  	// Given a number of pods to start "N":
    81  	// The number of doomed calls per sync once quota is exceeded is given by:
    82  	//      min(N,SlowStartInitialBatchSize)
    83  	// The number of batches is given by:
    84  	//      1+floor(log_2(ceil(N/SlowStartInitialBatchSize)))
    85  	SlowStartInitialBatchSize = 1
    86  )
    87  
    88  var UpdateTaintBackoff = wait.Backoff{
    89  	Steps:    5,
    90  	Duration: 100 * time.Millisecond,
    91  	Jitter:   1.0,
    92  }
    93  
    94  var UpdateLabelBackoff = wait.Backoff{
    95  	Steps:    5,
    96  	Duration: 100 * time.Millisecond,
    97  	Jitter:   1.0,
    98  }
    99  
   100  var (
   101  	KeyFunc           = cache.DeletionHandlingMetaNamespaceKeyFunc
   102  	podPhaseToOrdinal = map[v1.PodPhase]int{v1.PodPending: 0, v1.PodUnknown: 1, v1.PodRunning: 2}
   103  )
   104  
   105  type ResyncPeriodFunc func() time.Duration
   106  
   107  // Returns 0 for resyncPeriod in case resyncing is not needed.
   108  func NoResyncPeriodFunc() time.Duration {
   109  	return 0
   110  }
   111  
   112  // StaticResyncPeriodFunc returns the resync period specified
   113  func StaticResyncPeriodFunc(resyncPeriod time.Duration) ResyncPeriodFunc {
   114  	return func() time.Duration {
   115  		return resyncPeriod
   116  	}
   117  }
   118  
   119  // Expectations are a way for controllers to tell the controller manager what they expect. eg:
   120  //	ControllerExpectations: {
   121  //		controller1: expects  2 adds in 2 minutes
   122  //		controller2: expects  2 dels in 2 minutes
   123  //		controller3: expects -1 adds in 2 minutes => controller3's expectations have already been met
   124  //	}
   125  //
   126  // Implementation:
   127  //	ControlleeExpectation = pair of atomic counters to track controllee's creation/deletion
   128  //	ControllerExpectationsStore = TTLStore + a ControlleeExpectation per controller
   129  //
   130  // * Once set expectations can only be lowered
   131  // * A controller isn't synced till its expectations are either fulfilled, or expire
   132  // * Controllers that don't set expectations will get woken up for every matching controllee
   133  
   134  // ExpKeyFunc to parse out the key from a ControlleeExpectation
   135  var ExpKeyFunc = func(obj interface{}) (string, error) {
   136  	if e, ok := obj.(*ControlleeExpectations); ok {
   137  		return e.key, nil
   138  	}
   139  	return "", fmt.Errorf("could not find key for obj %#v", obj)
   140  }
   141  
   142  // ControllerExpectationsInterface is an interface that allows users to set and wait on expectations.
   143  // Only abstracted out for testing.
   144  // Warning: if using KeyFunc it is not safe to use a single ControllerExpectationsInterface with different
   145  // types of controllers, because the keys might conflict across types.
   146  type ControllerExpectationsInterface interface {
   147  	GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error)
   148  	SatisfiedExpectations(logger klog.Logger, controllerKey string) bool
   149  	DeleteExpectations(logger klog.Logger, controllerKey string)
   150  	SetExpectations(logger klog.Logger, controllerKey string, add, del int) error
   151  	ExpectCreations(logger klog.Logger, controllerKey string, adds int) error
   152  	ExpectDeletions(logger klog.Logger, controllerKey string, dels int) error
   153  	CreationObserved(logger klog.Logger, controllerKey string)
   154  	DeletionObserved(logger klog.Logger, controllerKey string)
   155  	RaiseExpectations(logger klog.Logger, controllerKey string, add, del int)
   156  	LowerExpectations(logger klog.Logger, controllerKey string, add, del int)
   157  }
   158  
   159  // ControllerExpectations is a cache mapping controllers to what they expect to see before being woken up for a sync.
   160  type ControllerExpectations struct {
   161  	cache.Store
   162  }
   163  
   164  // GetExpectations returns the ControlleeExpectations of the given controller.
   165  func (r *ControllerExpectations) GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) {
   166  	exp, exists, err := r.GetByKey(controllerKey)
   167  	if err == nil && exists {
   168  		return exp.(*ControlleeExpectations), true, nil
   169  	}
   170  	return nil, false, err
   171  }
   172  
   173  // DeleteExpectations deletes the expectations of the given controller from the TTLStore.
   174  func (r *ControllerExpectations) DeleteExpectations(logger klog.Logger, controllerKey string) {
   175  	if exp, exists, err := r.GetByKey(controllerKey); err == nil && exists {
   176  		if err := r.Delete(exp); err != nil {
   177  
   178  			logger.V(2).Info("Error deleting expectations", "controller", controllerKey, "err", err)
   179  		}
   180  	}
   181  }
   182  
   183  // SatisfiedExpectations returns true if the required adds/dels for the given controller have been observed.
   184  // Add/del counts are established by the controller at sync time, and updated as controllees are observed by the controller
   185  // manager.
   186  func (r *ControllerExpectations) SatisfiedExpectations(logger klog.Logger, controllerKey string) bool {
   187  	if exp, exists, err := r.GetExpectations(controllerKey); exists {
   188  		if exp.Fulfilled() {
   189  			logger.V(4).Info("Controller expectations fulfilled", "expectations", exp)
   190  			return true
   191  		} else if exp.isExpired() {
   192  			logger.V(4).Info("Controller expectations expired", "expectations", exp)
   193  			return true
   194  		} else {
   195  			logger.V(4).Info("Controller still waiting on expectations", "expectations", exp)
   196  			return false
   197  		}
   198  	} else if err != nil {
   199  		logger.V(2).Info("Error encountered while checking expectations, forcing sync", "err", err)
   200  	} else {
   201  		// When a new controller is created, it doesn't have expectations.
   202  		// When it doesn't see expected watch events for > TTL, the expectations expire.
   203  		//	- In this case it wakes up, creates/deletes controllees, and sets expectations again.
   204  		// When it has satisfied expectations and no controllees need to be created/destroyed > TTL, the expectations expire.
   205  		//	- In this case it continues without setting expectations till it needs to create/delete controllees.
   206  		logger.V(4).Info("Controller either never recorded expectations, or the ttl expired", "controller", controllerKey)
   207  	}
   208  	// Trigger a sync if we either encountered and error (which shouldn't happen since we're
   209  	// getting from local store) or this controller hasn't established expectations.
   210  	return true
   211  }
   212  
   213  // TODO: Extend ExpirationCache to support explicit expiration.
   214  // TODO: Make this possible to disable in tests.
   215  // TODO: Support injection of clock.
   216  func (exp *ControlleeExpectations) isExpired() bool {
   217  	return clock.RealClock{}.Since(exp.timestamp) > ExpectationsTimeout
   218  }
   219  
   220  // SetExpectations registers new expectations for the given controller. Forgets existing expectations.
   221  func (r *ControllerExpectations) SetExpectations(logger klog.Logger, controllerKey string, add, del int) error {
   222  	exp := &ControlleeExpectations{add: int64(add), del: int64(del), key: controllerKey, timestamp: clock.RealClock{}.Now()}
   223  	logger.V(4).Info("Setting expectations", "expectations", exp)
   224  	return r.Add(exp)
   225  }
   226  
   227  func (r *ControllerExpectations) ExpectCreations(logger klog.Logger, controllerKey string, adds int) error {
   228  	return r.SetExpectations(logger, controllerKey, adds, 0)
   229  }
   230  
   231  func (r *ControllerExpectations) ExpectDeletions(logger klog.Logger, controllerKey string, dels int) error {
   232  	return r.SetExpectations(logger, controllerKey, 0, dels)
   233  }
   234  
   235  // Decrements the expectation counts of the given controller.
   236  func (r *ControllerExpectations) LowerExpectations(logger klog.Logger, controllerKey string, add, del int) {
   237  	if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists {
   238  		exp.Add(int64(-add), int64(-del))
   239  		// The expectations might've been modified since the update on the previous line.
   240  		logger.V(4).Info("Lowered expectations", "expectations", exp)
   241  	}
   242  }
   243  
   244  // Increments the expectation counts of the given controller.
   245  func (r *ControllerExpectations) RaiseExpectations(logger klog.Logger, controllerKey string, add, del int) {
   246  	if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists {
   247  		exp.Add(int64(add), int64(del))
   248  		// The expectations might've been modified since the update on the previous line.
   249  		logger.V(4).Info("Raised expectations", "expectations", exp)
   250  	}
   251  }
   252  
   253  // CreationObserved atomically decrements the `add` expectation count of the given controller.
   254  func (r *ControllerExpectations) CreationObserved(logger klog.Logger, controllerKey string) {
   255  	r.LowerExpectations(logger, controllerKey, 1, 0)
   256  }
   257  
   258  // DeletionObserved atomically decrements the `del` expectation count of the given controller.
   259  func (r *ControllerExpectations) DeletionObserved(logger klog.Logger, controllerKey string) {
   260  	r.LowerExpectations(logger, controllerKey, 0, 1)
   261  }
   262  
   263  // ControlleeExpectations track controllee creates/deletes.
   264  type ControlleeExpectations struct {
   265  	// Important: Since these two int64 fields are using sync/atomic, they have to be at the top of the struct due to a bug on 32-bit platforms
   266  	// See: https://golang.org/pkg/sync/atomic/ for more information
   267  	add       int64
   268  	del       int64
   269  	key       string
   270  	timestamp time.Time
   271  }
   272  
   273  // Add increments the add and del counters.
   274  func (e *ControlleeExpectations) Add(add, del int64) {
   275  	atomic.AddInt64(&e.add, add)
   276  	atomic.AddInt64(&e.del, del)
   277  }
   278  
   279  // Fulfilled returns true if this expectation has been fulfilled.
   280  func (e *ControlleeExpectations) Fulfilled() bool {
   281  	// TODO: think about why this line being atomic doesn't matter
   282  	return atomic.LoadInt64(&e.add) <= 0 && atomic.LoadInt64(&e.del) <= 0
   283  }
   284  
   285  // GetExpectations returns the add and del expectations of the controllee.
   286  func (e *ControlleeExpectations) GetExpectations() (int64, int64) {
   287  	return atomic.LoadInt64(&e.add), atomic.LoadInt64(&e.del)
   288  }
   289  
   290  // MarshalLog makes a thread-safe copy of the values of the expectations that
   291  // can be used for logging.
   292  func (e *ControlleeExpectations) MarshalLog() interface{} {
   293  	return struct {
   294  		add int64
   295  		del int64
   296  		key string
   297  	}{
   298  		add: atomic.LoadInt64(&e.add),
   299  		del: atomic.LoadInt64(&e.del),
   300  		key: e.key,
   301  	}
   302  }
   303  
   304  // NewControllerExpectations returns a store for ControllerExpectations.
   305  func NewControllerExpectations() *ControllerExpectations {
   306  	return &ControllerExpectations{cache.NewStore(ExpKeyFunc)}
   307  }
   308  
   309  // UIDSetKeyFunc to parse out the key from a UIDSet.
   310  var UIDSetKeyFunc = func(obj interface{}) (string, error) {
   311  	if u, ok := obj.(*UIDSet); ok {
   312  		return u.key, nil
   313  	}
   314  	return "", fmt.Errorf("could not find key for obj %#v", obj)
   315  }
   316  
   317  // UIDSet holds a key and a set of UIDs. Used by the
   318  // UIDTrackingControllerExpectations to remember which UID it has seen/still
   319  // waiting for.
   320  type UIDSet struct {
   321  	sets.String
   322  	key string
   323  }
   324  
   325  // UIDTrackingControllerExpectations tracks the UID of the pods it deletes.
   326  // This cache is needed over plain old expectations to safely handle graceful
   327  // deletion. The desired behavior is to treat an update that sets the
   328  // DeletionTimestamp on an object as a delete. To do so consistently, one needs
   329  // to remember the expected deletes so they aren't double counted.
   330  // TODO: Track creates as well (#22599)
   331  type UIDTrackingControllerExpectations struct {
   332  	ControllerExpectationsInterface
   333  	// TODO: There is a much nicer way to do this that involves a single store,
   334  	// a lock per entry, and a ControlleeExpectationsInterface type.
   335  	uidStoreLock sync.Mutex
   336  	// Store used for the UIDs associated with any expectation tracked via the
   337  	// ControllerExpectationsInterface.
   338  	uidStore cache.Store
   339  }
   340  
   341  // GetUIDs is a convenience method to avoid exposing the set of expected uids.
   342  // The returned set is not thread safe, all modifications must be made holding
   343  // the uidStoreLock.
   344  func (u *UIDTrackingControllerExpectations) GetUIDs(controllerKey string) sets.String {
   345  	if uid, exists, err := u.uidStore.GetByKey(controllerKey); err == nil && exists {
   346  		return uid.(*UIDSet).String
   347  	}
   348  	return nil
   349  }
   350  
   351  // ExpectDeletions records expectations for the given deleteKeys, against the given controller.
   352  func (u *UIDTrackingControllerExpectations) ExpectDeletions(logger klog.Logger, rcKey string, deletedKeys []string) error {
   353  	expectedUIDs := sets.NewString()
   354  	for _, k := range deletedKeys {
   355  		expectedUIDs.Insert(k)
   356  	}
   357  	logger.V(4).Info("Controller waiting on deletions", "controller", rcKey, "keys", deletedKeys)
   358  	u.uidStoreLock.Lock()
   359  	defer u.uidStoreLock.Unlock()
   360  
   361  	if existing := u.GetUIDs(rcKey); existing != nil && existing.Len() != 0 {
   362  		logger.Error(nil, "Clobbering existing delete keys", "keys", existing)
   363  	}
   364  	if err := u.uidStore.Add(&UIDSet{expectedUIDs, rcKey}); err != nil {
   365  		return err
   366  	}
   367  	return u.ControllerExpectationsInterface.ExpectDeletions(logger, rcKey, expectedUIDs.Len())
   368  }
   369  
   370  // DeletionObserved records the given deleteKey as a deletion, for the given rc.
   371  func (u *UIDTrackingControllerExpectations) DeletionObserved(logger klog.Logger, rcKey, deleteKey string) {
   372  	u.uidStoreLock.Lock()
   373  	defer u.uidStoreLock.Unlock()
   374  
   375  	uids := u.GetUIDs(rcKey)
   376  	if uids != nil && uids.Has(deleteKey) {
   377  		logger.V(4).Info("Controller received delete for pod", "controller", rcKey, "key", deleteKey)
   378  		u.ControllerExpectationsInterface.DeletionObserved(logger, rcKey)
   379  		uids.Delete(deleteKey)
   380  	}
   381  }
   382  
   383  // DeleteExpectations deletes the UID set and invokes DeleteExpectations on the
   384  // underlying ControllerExpectationsInterface.
   385  func (u *UIDTrackingControllerExpectations) DeleteExpectations(logger klog.Logger, rcKey string) {
   386  	u.uidStoreLock.Lock()
   387  	defer u.uidStoreLock.Unlock()
   388  
   389  	u.ControllerExpectationsInterface.DeleteExpectations(logger, rcKey)
   390  	if uidExp, exists, err := u.uidStore.GetByKey(rcKey); err == nil && exists {
   391  		if err := u.uidStore.Delete(uidExp); err != nil {
   392  			logger.V(2).Info("Error deleting uid expectations", "controller", rcKey, "err", err)
   393  		}
   394  	}
   395  }
   396  
   397  // NewUIDTrackingControllerExpectations returns a wrapper around
   398  // ControllerExpectations that is aware of deleteKeys.
   399  func NewUIDTrackingControllerExpectations(ce ControllerExpectationsInterface) *UIDTrackingControllerExpectations {
   400  	return &UIDTrackingControllerExpectations{ControllerExpectationsInterface: ce, uidStore: cache.NewStore(UIDSetKeyFunc)}
   401  }
   402  
   403  // Reasons for pod events
   404  const (
   405  	// FailedCreatePodReason is added in an event and in a replica set condition
   406  	// when a pod for a replica set is failed to be created.
   407  	FailedCreatePodReason = "FailedCreate"
   408  	// SuccessfulCreatePodReason is added in an event when a pod for a replica set
   409  	// is successfully created.
   410  	SuccessfulCreatePodReason = "SuccessfulCreate"
   411  	// FailedDeletePodReason is added in an event and in a replica set condition
   412  	// when a pod for a replica set is failed to be deleted.
   413  	FailedDeletePodReason = "FailedDelete"
   414  	// SuccessfulDeletePodReason is added in an event when a pod for a replica set
   415  	// is successfully deleted.
   416  	SuccessfulDeletePodReason = "SuccessfulDelete"
   417  )
   418  
   419  // RSControlInterface is an interface that knows how to add or delete
   420  // ReplicaSets, as well as increment or decrement them. It is used
   421  // by the deployment controller to ease testing of actions that it takes.
   422  type RSControlInterface interface {
   423  	PatchReplicaSet(ctx context.Context, namespace, name string, data []byte) error
   424  }
   425  
   426  // RealRSControl is the default implementation of RSControllerInterface.
   427  type RealRSControl struct {
   428  	KubeClient clientset.Interface
   429  	Recorder   record.EventRecorder
   430  }
   431  
   432  var _ RSControlInterface = &RealRSControl{}
   433  
   434  func (r RealRSControl) PatchReplicaSet(ctx context.Context, namespace, name string, data []byte) error {
   435  	_, err := r.KubeClient.AppsV1().ReplicaSets(namespace).Patch(ctx, name, types.StrategicMergePatchType, data, metav1.PatchOptions{})
   436  	return err
   437  }
   438  
   439  // TODO: merge the controller revision interface in controller_history.go with this one
   440  // ControllerRevisionControlInterface is an interface that knows how to patch
   441  // ControllerRevisions, as well as increment or decrement them. It is used
   442  // by the daemonset controller to ease testing of actions that it takes.
   443  type ControllerRevisionControlInterface interface {
   444  	PatchControllerRevision(ctx context.Context, namespace, name string, data []byte) error
   445  }
   446  
   447  // RealControllerRevisionControl is the default implementation of ControllerRevisionControlInterface.
   448  type RealControllerRevisionControl struct {
   449  	KubeClient clientset.Interface
   450  }
   451  
   452  var _ ControllerRevisionControlInterface = &RealControllerRevisionControl{}
   453  
   454  func (r RealControllerRevisionControl) PatchControllerRevision(ctx context.Context, namespace, name string, data []byte) error {
   455  	_, err := r.KubeClient.AppsV1().ControllerRevisions(namespace).Patch(ctx, name, types.StrategicMergePatchType, data, metav1.PatchOptions{})
   456  	return err
   457  }
   458  
   459  // PodControlInterface is an interface that knows how to add or delete pods
   460  // created as an interface to allow testing.
   461  type PodControlInterface interface {
   462  	// CreatePods creates new pods according to the spec, and sets object as the pod's controller.
   463  	CreatePods(ctx context.Context, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error
   464  	// CreatePodsWithGenerateName creates new pods according to the spec, sets object as the pod's controller and sets pod's generateName.
   465  	CreatePodsWithGenerateName(ctx context.Context, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference, generateName string) error
   466  	// DeletePod deletes the pod identified by podID.
   467  	DeletePod(ctx context.Context, namespace string, podID string, object runtime.Object) error
   468  	// PatchPod patches the pod.
   469  	PatchPod(ctx context.Context, namespace, name string, data []byte) error
   470  }
   471  
   472  // RealPodControl is the default implementation of PodControlInterface.
   473  type RealPodControl struct {
   474  	KubeClient clientset.Interface
   475  	Recorder   record.EventRecorder
   476  }
   477  
   478  var _ PodControlInterface = &RealPodControl{}
   479  
   480  func getPodsLabelSet(template *v1.PodTemplateSpec) labels.Set {
   481  	desiredLabels := make(labels.Set)
   482  	for k, v := range template.Labels {
   483  		desiredLabels[k] = v
   484  	}
   485  	return desiredLabels
   486  }
   487  
   488  func getPodsFinalizers(template *v1.PodTemplateSpec) []string {
   489  	desiredFinalizers := make([]string, len(template.Finalizers))
   490  	copy(desiredFinalizers, template.Finalizers)
   491  	return desiredFinalizers
   492  }
   493  
   494  func getPodsAnnotationSet(template *v1.PodTemplateSpec) labels.Set {
   495  	desiredAnnotations := make(labels.Set)
   496  	for k, v := range template.Annotations {
   497  		desiredAnnotations[k] = v
   498  	}
   499  	return desiredAnnotations
   500  }
   501  
   502  func getPodsPrefix(controllerName string) string {
   503  	// use the dash (if the name isn't too long) to make the pod name a bit prettier
   504  	prefix := fmt.Sprintf("%s-", controllerName)
   505  	if len(validation.ValidatePodName(prefix, true)) != 0 {
   506  		prefix = controllerName
   507  	}
   508  	return prefix
   509  }
   510  
   511  func validateControllerRef(controllerRef *metav1.OwnerReference) error {
   512  	if controllerRef == nil {
   513  		return fmt.Errorf("controllerRef is nil")
   514  	}
   515  	if len(controllerRef.APIVersion) == 0 {
   516  		return fmt.Errorf("controllerRef has empty APIVersion")
   517  	}
   518  	if len(controllerRef.Kind) == 0 {
   519  		return fmt.Errorf("controllerRef has empty Kind")
   520  	}
   521  	if controllerRef.Controller == nil || !*controllerRef.Controller {
   522  		return fmt.Errorf("controllerRef.Controller is not set to true")
   523  	}
   524  	if controllerRef.BlockOwnerDeletion == nil || !*controllerRef.BlockOwnerDeletion {
   525  		return fmt.Errorf("controllerRef.BlockOwnerDeletion is not set")
   526  	}
   527  	return nil
   528  }
   529  
   530  func (r RealPodControl) CreatePods(ctx context.Context, namespace string, template *v1.PodTemplateSpec, controllerObject runtime.Object, controllerRef *metav1.OwnerReference) error {
   531  	return r.CreatePodsWithGenerateName(ctx, namespace, template, controllerObject, controllerRef, "")
   532  }
   533  
   534  func (r RealPodControl) CreatePodsWithGenerateName(ctx context.Context, namespace string, template *v1.PodTemplateSpec, controllerObject runtime.Object, controllerRef *metav1.OwnerReference, generateName string) error {
   535  	if err := validateControllerRef(controllerRef); err != nil {
   536  		return err
   537  	}
   538  	pod, err := GetPodFromTemplate(template, controllerObject, controllerRef)
   539  	if err != nil {
   540  		return err
   541  	}
   542  	if len(generateName) > 0 {
   543  		pod.ObjectMeta.GenerateName = generateName
   544  	}
   545  	return r.createPods(ctx, namespace, pod, controllerObject)
   546  }
   547  
   548  func (r RealPodControl) PatchPod(ctx context.Context, namespace, name string, data []byte) error {
   549  	_, err := r.KubeClient.CoreV1().Pods(namespace).Patch(ctx, name, types.StrategicMergePatchType, data, metav1.PatchOptions{})
   550  	return err
   551  }
   552  
   553  func GetPodFromTemplate(template *v1.PodTemplateSpec, parentObject runtime.Object, controllerRef *metav1.OwnerReference) (*v1.Pod, error) {
   554  	desiredLabels := getPodsLabelSet(template)
   555  	desiredFinalizers := getPodsFinalizers(template)
   556  	desiredAnnotations := getPodsAnnotationSet(template)
   557  	accessor, err := meta.Accessor(parentObject)
   558  	if err != nil {
   559  		return nil, fmt.Errorf("parentObject does not have ObjectMeta, %v", err)
   560  	}
   561  	prefix := getPodsPrefix(accessor.GetName())
   562  
   563  	pod := &v1.Pod{
   564  		ObjectMeta: metav1.ObjectMeta{
   565  			Labels:       desiredLabels,
   566  			Annotations:  desiredAnnotations,
   567  			GenerateName: prefix,
   568  			Finalizers:   desiredFinalizers,
   569  		},
   570  	}
   571  	if controllerRef != nil {
   572  		pod.OwnerReferences = append(pod.OwnerReferences, *controllerRef)
   573  	}
   574  	pod.Spec = *template.Spec.DeepCopy()
   575  	return pod, nil
   576  }
   577  
   578  func (r RealPodControl) createPods(ctx context.Context, namespace string, pod *v1.Pod, object runtime.Object) error {
   579  	if len(labels.Set(pod.Labels)) == 0 {
   580  		return fmt.Errorf("unable to create pods, no labels")
   581  	}
   582  	newPod, err := r.KubeClient.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
   583  	if err != nil {
   584  		// only send an event if the namespace isn't terminating
   585  		if !apierrors.HasStatusCause(err, v1.NamespaceTerminatingCause) {
   586  			r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err)
   587  		}
   588  		return err
   589  	}
   590  	logger := klog.FromContext(ctx)
   591  	accessor, err := meta.Accessor(object)
   592  	if err != nil {
   593  		logger.Error(err, "parentObject does not have ObjectMeta")
   594  		return nil
   595  	}
   596  	logger.V(4).Info("Controller created pod", "controller", accessor.GetName(), "pod", klog.KObj(newPod))
   597  	r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulCreatePodReason, "Created pod: %v", newPod.Name)
   598  
   599  	return nil
   600  }
   601  
   602  func (r RealPodControl) DeletePod(ctx context.Context, namespace string, podID string, object runtime.Object) error {
   603  	accessor, err := meta.Accessor(object)
   604  	if err != nil {
   605  		return fmt.Errorf("object does not have ObjectMeta, %v", err)
   606  	}
   607  	logger := klog.FromContext(ctx)
   608  	logger.V(2).Info("Deleting pod", "controller", accessor.GetName(), "pod", klog.KRef(namespace, podID))
   609  	if err := r.KubeClient.CoreV1().Pods(namespace).Delete(ctx, podID, metav1.DeleteOptions{}); err != nil {
   610  		if apierrors.IsNotFound(err) {
   611  			logger.V(4).Info("Pod has already been deleted.", "pod", klog.KRef(namespace, podID))
   612  			return err
   613  		}
   614  		r.Recorder.Eventf(object, v1.EventTypeWarning, FailedDeletePodReason, "Error deleting: %v", err)
   615  		return fmt.Errorf("unable to delete pods: %v", err)
   616  	}
   617  	r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulDeletePodReason, "Deleted pod: %v", podID)
   618  
   619  	return nil
   620  }
   621  
   622  type FakePodControl struct {
   623  	sync.Mutex
   624  	Templates       []v1.PodTemplateSpec
   625  	ControllerRefs  []metav1.OwnerReference
   626  	DeletePodName   []string
   627  	Patches         [][]byte
   628  	Err             error
   629  	CreateLimit     int
   630  	CreateCallCount int
   631  }
   632  
   633  var _ PodControlInterface = &FakePodControl{}
   634  
   635  func (f *FakePodControl) PatchPod(ctx context.Context, namespace, name string, data []byte) error {
   636  	f.Lock()
   637  	defer f.Unlock()
   638  	f.Patches = append(f.Patches, data)
   639  	if f.Err != nil {
   640  		return f.Err
   641  	}
   642  	return nil
   643  }
   644  
   645  func (f *FakePodControl) CreatePods(ctx context.Context, namespace string, spec *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error {
   646  	return f.CreatePodsWithGenerateName(ctx, namespace, spec, object, controllerRef, "")
   647  }
   648  
   649  func (f *FakePodControl) CreatePodsWithGenerateName(ctx context.Context, namespace string, spec *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference, generateNamePrefix string) error {
   650  	f.Lock()
   651  	defer f.Unlock()
   652  	f.CreateCallCount++
   653  	if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit {
   654  		return fmt.Errorf("not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount)
   655  	}
   656  	spec.GenerateName = generateNamePrefix
   657  	f.Templates = append(f.Templates, *spec)
   658  	f.ControllerRefs = append(f.ControllerRefs, *controllerRef)
   659  	if f.Err != nil {
   660  		return f.Err
   661  	}
   662  	return nil
   663  }
   664  
   665  func (f *FakePodControl) DeletePod(ctx context.Context, namespace string, podID string, object runtime.Object) error {
   666  	f.Lock()
   667  	defer f.Unlock()
   668  	f.DeletePodName = append(f.DeletePodName, podID)
   669  	if f.Err != nil {
   670  		return f.Err
   671  	}
   672  	return nil
   673  }
   674  
   675  func (f *FakePodControl) Clear() {
   676  	f.Lock()
   677  	defer f.Unlock()
   678  	f.DeletePodName = []string{}
   679  	f.Templates = []v1.PodTemplateSpec{}
   680  	f.ControllerRefs = []metav1.OwnerReference{}
   681  	f.Patches = [][]byte{}
   682  	f.CreateLimit = 0
   683  	f.CreateCallCount = 0
   684  }
   685  
   686  // ByLogging allows custom sorting of pods so the best one can be picked for getting its logs.
   687  type ByLogging []*v1.Pod
   688  
   689  func (s ByLogging) Len() int      { return len(s) }
   690  func (s ByLogging) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
   691  
   692  func (s ByLogging) Less(i, j int) bool {
   693  	// 1. assigned < unassigned
   694  	if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) {
   695  		return len(s[i].Spec.NodeName) > 0
   696  	}
   697  	// 2. PodRunning < PodUnknown < PodPending
   698  	if s[i].Status.Phase != s[j].Status.Phase {
   699  		return podPhaseToOrdinal[s[i].Status.Phase] > podPhaseToOrdinal[s[j].Status.Phase]
   700  	}
   701  	// 3. ready < not ready
   702  	if podutil.IsPodReady(s[i]) != podutil.IsPodReady(s[j]) {
   703  		return podutil.IsPodReady(s[i])
   704  	}
   705  	// TODO: take availability into account when we push minReadySeconds information from deployment into pods,
   706  	//       see https://github.com/kubernetes/kubernetes/issues/22065
   707  	// 4. Been ready for more time < less time < empty time
   708  	if podutil.IsPodReady(s[i]) && podutil.IsPodReady(s[j]) {
   709  		readyTime1 := podReadyTime(s[i])
   710  		readyTime2 := podReadyTime(s[j])
   711  		if !readyTime1.Equal(readyTime2) {
   712  			return afterOrZero(readyTime2, readyTime1)
   713  		}
   714  	}
   715  	// 5. Pods with containers with higher restart counts < lower restart counts
   716  	if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) {
   717  		return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j])
   718  	}
   719  	// 6. older pods < newer pods < empty timestamp pods
   720  	if !s[i].CreationTimestamp.Equal(&s[j].CreationTimestamp) {
   721  		return afterOrZero(&s[j].CreationTimestamp, &s[i].CreationTimestamp)
   722  	}
   723  	return false
   724  }
   725  
   726  // ActivePods type allows custom sorting of pods so a controller can pick the best ones to delete.
   727  type ActivePods []*v1.Pod
   728  
   729  func (s ActivePods) Len() int      { return len(s) }
   730  func (s ActivePods) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
   731  
   732  func (s ActivePods) Less(i, j int) bool {
   733  	// 1. Unassigned < assigned
   734  	// If only one of the pods is unassigned, the unassigned one is smaller
   735  	if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) {
   736  		return len(s[i].Spec.NodeName) == 0
   737  	}
   738  	// 2. PodPending < PodUnknown < PodRunning
   739  	if podPhaseToOrdinal[s[i].Status.Phase] != podPhaseToOrdinal[s[j].Status.Phase] {
   740  		return podPhaseToOrdinal[s[i].Status.Phase] < podPhaseToOrdinal[s[j].Status.Phase]
   741  	}
   742  	// 3. Not ready < ready
   743  	// If only one of the pods is not ready, the not ready one is smaller
   744  	if podutil.IsPodReady(s[i]) != podutil.IsPodReady(s[j]) {
   745  		return !podutil.IsPodReady(s[i])
   746  	}
   747  	// TODO: take availability into account when we push minReadySeconds information from deployment into pods,
   748  	//       see https://github.com/kubernetes/kubernetes/issues/22065
   749  	// 4. Been ready for empty time < less time < more time
   750  	// If both pods are ready, the latest ready one is smaller
   751  	if podutil.IsPodReady(s[i]) && podutil.IsPodReady(s[j]) {
   752  		readyTime1 := podReadyTime(s[i])
   753  		readyTime2 := podReadyTime(s[j])
   754  		if !readyTime1.Equal(readyTime2) {
   755  			return afterOrZero(readyTime1, readyTime2)
   756  		}
   757  	}
   758  	// 5. Pods with containers with higher restart counts < lower restart counts
   759  	if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) {
   760  		return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j])
   761  	}
   762  	// 6. Empty creation time pods < newer pods < older pods
   763  	if !s[i].CreationTimestamp.Equal(&s[j].CreationTimestamp) {
   764  		return afterOrZero(&s[i].CreationTimestamp, &s[j].CreationTimestamp)
   765  	}
   766  	return false
   767  }
   768  
   769  // ActivePodsWithRanks is a sortable list of pods and a list of corresponding
   770  // ranks which will be considered during sorting.  The two lists must have equal
   771  // length.  After sorting, the pods will be ordered as follows, applying each
   772  // rule in turn until one matches:
   773  //
   774  //  1. If only one of the pods is assigned to a node, the pod that is not
   775  //     assigned comes before the pod that is.
   776  //  2. If the pods' phases differ, a pending pod comes before a pod whose phase
   777  //     is unknown, and a pod whose phase is unknown comes before a running pod.
   778  //  3. If exactly one of the pods is ready, the pod that is not ready comes
   779  //     before the ready pod.
   780  //  4. If controller.kubernetes.io/pod-deletion-cost annotation is set, then
   781  //     the pod with the lower value will come first.
   782  //  5. If the pods' ranks differ, the pod with greater rank comes before the pod
   783  //     with lower rank.
   784  //  6. If both pods are ready but have not been ready for the same amount of
   785  //     time, the pod that has been ready for a shorter amount of time comes
   786  //     before the pod that has been ready for longer.
   787  //  7. If one pod has a container that has restarted more than any container in
   788  //     the other pod, the pod with the container with more restarts comes
   789  //     before the other pod.
   790  //  8. If the pods' creation times differ, the pod that was created more recently
   791  //     comes before the older pod.
   792  //
   793  // In 6 and 8, times are compared in a logarithmic scale. This allows a level
   794  // of randomness among equivalent Pods when sorting. If two pods have the same
   795  // logarithmic rank, they are sorted by UUID to provide a pseudorandom order.
   796  //
   797  // If none of these rules matches, the second pod comes before the first pod.
   798  //
   799  // The intention of this ordering is to put pods that should be preferred for
   800  // deletion first in the list.
   801  type ActivePodsWithRanks struct {
   802  	// Pods is a list of pods.
   803  	Pods []*v1.Pod
   804  
   805  	// Rank is a ranking of pods.  This ranking is used during sorting when
   806  	// comparing two pods that are both scheduled, in the same phase, and
   807  	// having the same ready status.
   808  	Rank []int
   809  
   810  	// Now is a reference timestamp for doing logarithmic timestamp comparisons.
   811  	// If zero, comparison happens without scaling.
   812  	Now metav1.Time
   813  }
   814  
   815  func (s ActivePodsWithRanks) Len() int {
   816  	return len(s.Pods)
   817  }
   818  
   819  func (s ActivePodsWithRanks) Swap(i, j int) {
   820  	s.Pods[i], s.Pods[j] = s.Pods[j], s.Pods[i]
   821  	s.Rank[i], s.Rank[j] = s.Rank[j], s.Rank[i]
   822  }
   823  
   824  // Less compares two pods with corresponding ranks and returns true if the first
   825  // one should be preferred for deletion.
   826  func (s ActivePodsWithRanks) Less(i, j int) bool {
   827  	// 1. Unassigned < assigned
   828  	// If only one of the pods is unassigned, the unassigned one is smaller
   829  	if s.Pods[i].Spec.NodeName != s.Pods[j].Spec.NodeName && (len(s.Pods[i].Spec.NodeName) == 0 || len(s.Pods[j].Spec.NodeName) == 0) {
   830  		return len(s.Pods[i].Spec.NodeName) == 0
   831  	}
   832  	// 2. PodPending < PodUnknown < PodRunning
   833  	if podPhaseToOrdinal[s.Pods[i].Status.Phase] != podPhaseToOrdinal[s.Pods[j].Status.Phase] {
   834  		return podPhaseToOrdinal[s.Pods[i].Status.Phase] < podPhaseToOrdinal[s.Pods[j].Status.Phase]
   835  	}
   836  	// 3. Not ready < ready
   837  	// If only one of the pods is not ready, the not ready one is smaller
   838  	if podutil.IsPodReady(s.Pods[i]) != podutil.IsPodReady(s.Pods[j]) {
   839  		return !podutil.IsPodReady(s.Pods[i])
   840  	}
   841  
   842  	// 4. lower pod-deletion-cost < higher pod-deletion cost
   843  	if utilfeature.DefaultFeatureGate.Enabled(features.PodDeletionCost) {
   844  		pi, _ := helper.GetDeletionCostFromPodAnnotations(s.Pods[i].Annotations)
   845  		pj, _ := helper.GetDeletionCostFromPodAnnotations(s.Pods[j].Annotations)
   846  		if pi != pj {
   847  			return pi < pj
   848  		}
   849  	}
   850  
   851  	// 5. Doubled up < not doubled up
   852  	// If one of the two pods is on the same node as one or more additional
   853  	// ready pods that belong to the same replicaset, whichever pod has more
   854  	// colocated ready pods is less
   855  	if s.Rank[i] != s.Rank[j] {
   856  		return s.Rank[i] > s.Rank[j]
   857  	}
   858  	// TODO: take availability into account when we push minReadySeconds information from deployment into pods,
   859  	//       see https://github.com/kubernetes/kubernetes/issues/22065
   860  	// 6. Been ready for empty time < less time < more time
   861  	// If both pods are ready, the latest ready one is smaller
   862  	if podutil.IsPodReady(s.Pods[i]) && podutil.IsPodReady(s.Pods[j]) {
   863  		readyTime1 := podReadyTime(s.Pods[i])
   864  		readyTime2 := podReadyTime(s.Pods[j])
   865  		if !readyTime1.Equal(readyTime2) {
   866  			if !utilfeature.DefaultFeatureGate.Enabled(features.LogarithmicScaleDown) {
   867  				return afterOrZero(readyTime1, readyTime2)
   868  			} else {
   869  				if s.Now.IsZero() || readyTime1.IsZero() || readyTime2.IsZero() {
   870  					return afterOrZero(readyTime1, readyTime2)
   871  				}
   872  				rankDiff := logarithmicRankDiff(*readyTime1, *readyTime2, s.Now)
   873  				if rankDiff == 0 {
   874  					return s.Pods[i].UID < s.Pods[j].UID
   875  				}
   876  				return rankDiff < 0
   877  			}
   878  		}
   879  	}
   880  	// 7. Pods with containers with higher restart counts < lower restart counts
   881  	if maxContainerRestarts(s.Pods[i]) != maxContainerRestarts(s.Pods[j]) {
   882  		return maxContainerRestarts(s.Pods[i]) > maxContainerRestarts(s.Pods[j])
   883  	}
   884  	// 8. Empty creation time pods < newer pods < older pods
   885  	if !s.Pods[i].CreationTimestamp.Equal(&s.Pods[j].CreationTimestamp) {
   886  		if !utilfeature.DefaultFeatureGate.Enabled(features.LogarithmicScaleDown) {
   887  			return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp)
   888  		} else {
   889  			if s.Now.IsZero() || s.Pods[i].CreationTimestamp.IsZero() || s.Pods[j].CreationTimestamp.IsZero() {
   890  				return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp)
   891  			}
   892  			rankDiff := logarithmicRankDiff(s.Pods[i].CreationTimestamp, s.Pods[j].CreationTimestamp, s.Now)
   893  			if rankDiff == 0 {
   894  				return s.Pods[i].UID < s.Pods[j].UID
   895  			}
   896  			return rankDiff < 0
   897  		}
   898  	}
   899  	return false
   900  }
   901  
   902  // afterOrZero checks if time t1 is after time t2; if one of them
   903  // is zero, the zero time is seen as after non-zero time.
   904  func afterOrZero(t1, t2 *metav1.Time) bool {
   905  	if t1.Time.IsZero() || t2.Time.IsZero() {
   906  		return t1.Time.IsZero()
   907  	}
   908  	return t1.After(t2.Time)
   909  }
   910  
   911  // logarithmicRankDiff calculates the base-2 logarithmic ranks of 2 timestamps,
   912  // compared to the current timestamp
   913  func logarithmicRankDiff(t1, t2, now metav1.Time) int64 {
   914  	d1 := now.Sub(t1.Time)
   915  	d2 := now.Sub(t2.Time)
   916  	r1 := int64(-1)
   917  	r2 := int64(-1)
   918  	if d1 > 0 {
   919  		r1 = int64(math.Log2(float64(d1)))
   920  	}
   921  	if d2 > 0 {
   922  		r2 = int64(math.Log2(float64(d2)))
   923  	}
   924  	return r1 - r2
   925  }
   926  
   927  func podReadyTime(pod *v1.Pod) *metav1.Time {
   928  	if podutil.IsPodReady(pod) {
   929  		for _, c := range pod.Status.Conditions {
   930  			// we only care about pod ready conditions
   931  			if c.Type == v1.PodReady && c.Status == v1.ConditionTrue {
   932  				return &c.LastTransitionTime
   933  			}
   934  		}
   935  	}
   936  	return &metav1.Time{}
   937  }
   938  
   939  func maxContainerRestarts(pod *v1.Pod) int {
   940  	maxRestarts := 0
   941  	for _, c := range pod.Status.ContainerStatuses {
   942  		maxRestarts = max(maxRestarts, int(c.RestartCount))
   943  	}
   944  	return maxRestarts
   945  }
   946  
   947  // FilterActivePods returns pods that have not terminated.
   948  func FilterActivePods(logger klog.Logger, pods []*v1.Pod) []*v1.Pod {
   949  	var result []*v1.Pod
   950  	for _, p := range pods {
   951  		if IsPodActive(p) {
   952  			result = append(result, p)
   953  		} else {
   954  			logger.V(4).Info("Ignoring inactive pod", "pod", klog.KObj(p), "phase", p.Status.Phase, "deletionTime", klog.SafePtr(p.DeletionTimestamp))
   955  		}
   956  	}
   957  	return result
   958  }
   959  
   960  func FilterTerminatingPods(pods []*v1.Pod) []*v1.Pod {
   961  	var result []*v1.Pod
   962  	for _, p := range pods {
   963  		if IsPodTerminating(p) {
   964  			result = append(result, p)
   965  		}
   966  	}
   967  	return result
   968  }
   969  
   970  func CountTerminatingPods(pods []*v1.Pod) int32 {
   971  	numberOfTerminatingPods := 0
   972  	for _, p := range pods {
   973  		if IsPodTerminating(p) {
   974  			numberOfTerminatingPods += 1
   975  		}
   976  	}
   977  	return int32(numberOfTerminatingPods)
   978  }
   979  
   980  func IsPodActive(p *v1.Pod) bool {
   981  	return v1.PodSucceeded != p.Status.Phase &&
   982  		v1.PodFailed != p.Status.Phase &&
   983  		p.DeletionTimestamp == nil
   984  }
   985  
   986  func IsPodTerminating(p *v1.Pod) bool {
   987  	return !podutil.IsPodTerminal(p) &&
   988  		p.DeletionTimestamp != nil
   989  }
   990  
   991  // FilterActiveReplicaSets returns replica sets that have (or at least ought to have) pods.
   992  func FilterActiveReplicaSets(replicaSets []*apps.ReplicaSet) []*apps.ReplicaSet {
   993  	activeFilter := func(rs *apps.ReplicaSet) bool {
   994  		return rs != nil && *(rs.Spec.Replicas) > 0
   995  	}
   996  	return FilterReplicaSets(replicaSets, activeFilter)
   997  }
   998  
   999  type filterRS func(rs *apps.ReplicaSet) bool
  1000  
  1001  // FilterReplicaSets returns replica sets that are filtered by filterFn (all returned ones should match filterFn).
  1002  func FilterReplicaSets(RSes []*apps.ReplicaSet, filterFn filterRS) []*apps.ReplicaSet {
  1003  	var filtered []*apps.ReplicaSet
  1004  	for i := range RSes {
  1005  		if filterFn(RSes[i]) {
  1006  			filtered = append(filtered, RSes[i])
  1007  		}
  1008  	}
  1009  	return filtered
  1010  }
  1011  
  1012  // PodKey returns a key unique to the given pod within a cluster.
  1013  // It's used so we consistently use the same key scheme in this module.
  1014  // It does exactly what cache.MetaNamespaceKeyFunc would have done
  1015  // except there's not possibility for error since we know the exact type.
  1016  func PodKey(pod *v1.Pod) string {
  1017  	return fmt.Sprintf("%v/%v", pod.Namespace, pod.Name)
  1018  }
  1019  
  1020  // ControllersByCreationTimestamp sorts a list of ReplicationControllers by creation timestamp, using their names as a tie breaker.
  1021  type ControllersByCreationTimestamp []*v1.ReplicationController
  1022  
  1023  func (o ControllersByCreationTimestamp) Len() int      { return len(o) }
  1024  func (o ControllersByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
  1025  func (o ControllersByCreationTimestamp) Less(i, j int) bool {
  1026  	if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) {
  1027  		return o[i].Name < o[j].Name
  1028  	}
  1029  	return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp)
  1030  }
  1031  
  1032  // ReplicaSetsByCreationTimestamp sorts a list of ReplicaSet by creation timestamp, using their names as a tie breaker.
  1033  type ReplicaSetsByCreationTimestamp []*apps.ReplicaSet
  1034  
  1035  func (o ReplicaSetsByCreationTimestamp) Len() int      { return len(o) }
  1036  func (o ReplicaSetsByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
  1037  func (o ReplicaSetsByCreationTimestamp) Less(i, j int) bool {
  1038  	if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) {
  1039  		return o[i].Name < o[j].Name
  1040  	}
  1041  	return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp)
  1042  }
  1043  
  1044  // ReplicaSetsBySizeOlder sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker.
  1045  // By using the creation timestamp, this sorts from old to new replica sets.
  1046  type ReplicaSetsBySizeOlder []*apps.ReplicaSet
  1047  
  1048  func (o ReplicaSetsBySizeOlder) Len() int      { return len(o) }
  1049  func (o ReplicaSetsBySizeOlder) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
  1050  func (o ReplicaSetsBySizeOlder) Less(i, j int) bool {
  1051  	if *(o[i].Spec.Replicas) == *(o[j].Spec.Replicas) {
  1052  		return ReplicaSetsByCreationTimestamp(o).Less(i, j)
  1053  	}
  1054  	return *(o[i].Spec.Replicas) > *(o[j].Spec.Replicas)
  1055  }
  1056  
  1057  // ReplicaSetsBySizeNewer sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker.
  1058  // By using the creation timestamp, this sorts from new to old replica sets.
  1059  type ReplicaSetsBySizeNewer []*apps.ReplicaSet
  1060  
  1061  func (o ReplicaSetsBySizeNewer) Len() int      { return len(o) }
  1062  func (o ReplicaSetsBySizeNewer) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
  1063  func (o ReplicaSetsBySizeNewer) Less(i, j int) bool {
  1064  	if *(o[i].Spec.Replicas) == *(o[j].Spec.Replicas) {
  1065  		return ReplicaSetsByCreationTimestamp(o).Less(j, i)
  1066  	}
  1067  	return *(o[i].Spec.Replicas) > *(o[j].Spec.Replicas)
  1068  }
  1069  
  1070  // AddOrUpdateTaintOnNode add taints to the node. If taint was added into node, it'll issue API calls
  1071  // to update nodes; otherwise, no API calls. Return error if any.
  1072  func AddOrUpdateTaintOnNode(ctx context.Context, c clientset.Interface, nodeName string, taints ...*v1.Taint) error {
  1073  	if len(taints) == 0 {
  1074  		return nil
  1075  	}
  1076  	firstTry := true
  1077  	return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error {
  1078  		var err error
  1079  		var oldNode *v1.Node
  1080  		// First we try getting node from the API server cache, as it's cheaper. If it fails
  1081  		// we get it from etcd to be sure to have fresh data.
  1082  		option := metav1.GetOptions{}
  1083  		if firstTry {
  1084  			option.ResourceVersion = "0"
  1085  			firstTry = false
  1086  		}
  1087  		oldNode, err = c.CoreV1().Nodes().Get(ctx, nodeName, option)
  1088  		if err != nil {
  1089  			return err
  1090  		}
  1091  
  1092  		var newNode *v1.Node
  1093  		oldNodeCopy := oldNode
  1094  		updated := false
  1095  		for _, taint := range taints {
  1096  			curNewNode, ok, err := taintutils.AddOrUpdateTaint(oldNodeCopy, taint)
  1097  			if err != nil {
  1098  				return fmt.Errorf("failed to update taint of node")
  1099  			}
  1100  			updated = updated || ok
  1101  			newNode = curNewNode
  1102  			oldNodeCopy = curNewNode
  1103  		}
  1104  		if !updated {
  1105  			return nil
  1106  		}
  1107  		return PatchNodeTaints(ctx, c, nodeName, oldNode, newNode)
  1108  	})
  1109  }
  1110  
  1111  // RemoveTaintOffNode is for cleaning up taints temporarily added to node,
  1112  // won't fail if target taint doesn't exist or has been removed.
  1113  // If passed a node it'll check if there's anything to be done, if taint is not present it won't issue
  1114  // any API calls.
  1115  func RemoveTaintOffNode(ctx context.Context, c clientset.Interface, nodeName string, node *v1.Node, taints ...*v1.Taint) error {
  1116  	if len(taints) == 0 {
  1117  		return nil
  1118  	}
  1119  	// Short circuit for limiting amount of API calls.
  1120  	if node != nil {
  1121  		match := false
  1122  		for _, taint := range taints {
  1123  			if taintutils.TaintExists(node.Spec.Taints, taint) {
  1124  				match = true
  1125  				break
  1126  			}
  1127  		}
  1128  		if !match {
  1129  			return nil
  1130  		}
  1131  	}
  1132  
  1133  	firstTry := true
  1134  	return clientretry.RetryOnConflict(UpdateTaintBackoff, func() error {
  1135  		var err error
  1136  		var oldNode *v1.Node
  1137  		// First we try getting node from the API server cache, as it's cheaper. If it fails
  1138  		// we get it from etcd to be sure to have fresh data.
  1139  		option := metav1.GetOptions{}
  1140  		if firstTry {
  1141  			option.ResourceVersion = "0"
  1142  			firstTry = false
  1143  		}
  1144  		oldNode, err = c.CoreV1().Nodes().Get(ctx, nodeName, option)
  1145  		if err != nil {
  1146  			return err
  1147  		}
  1148  
  1149  		var newNode *v1.Node
  1150  		oldNodeCopy := oldNode
  1151  		updated := false
  1152  		for _, taint := range taints {
  1153  			curNewNode, ok, err := taintutils.RemoveTaint(oldNodeCopy, taint)
  1154  			if err != nil {
  1155  				return fmt.Errorf("failed to remove taint of node")
  1156  			}
  1157  			updated = updated || ok
  1158  			newNode = curNewNode
  1159  			oldNodeCopy = curNewNode
  1160  		}
  1161  		if !updated {
  1162  			return nil
  1163  		}
  1164  		return PatchNodeTaints(ctx, c, nodeName, oldNode, newNode)
  1165  	})
  1166  }
  1167  
  1168  // PatchNodeTaints patches node's taints.
  1169  func PatchNodeTaints(ctx context.Context, c clientset.Interface, nodeName string, oldNode *v1.Node, newNode *v1.Node) error {
  1170  	// Strip base diff node from RV to ensure that our Patch request will set RV to check for conflicts over .spec.taints.
  1171  	// This is needed because .spec.taints does not specify patchMergeKey and patchStrategy and adding them is no longer an option for compatibility reasons.
  1172  	// Using other Patch strategy works for adding new taints, however will not resolve problem with taint removal.
  1173  	oldNodeNoRV := oldNode.DeepCopy()
  1174  	oldNodeNoRV.ResourceVersion = ""
  1175  	oldDataNoRV, err := json.Marshal(&oldNodeNoRV)
  1176  	if err != nil {
  1177  		return fmt.Errorf("failed to marshal old node %#v for node %q: %v", oldNodeNoRV, nodeName, err)
  1178  	}
  1179  
  1180  	newTaints := newNode.Spec.Taints
  1181  	newNodeClone := oldNode.DeepCopy()
  1182  	newNodeClone.Spec.Taints = newTaints
  1183  	newData, err := json.Marshal(newNodeClone)
  1184  	if err != nil {
  1185  		return fmt.Errorf("failed to marshal new node %#v for node %q: %v", newNodeClone, nodeName, err)
  1186  	}
  1187  
  1188  	patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldDataNoRV, newData, v1.Node{})
  1189  	if err != nil {
  1190  		return fmt.Errorf("failed to create patch for node %q: %v", nodeName, err)
  1191  	}
  1192  
  1193  	_, err = c.CoreV1().Nodes().Patch(ctx, nodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{})
  1194  	return err
  1195  }
  1196  
  1197  // ComputeHash returns a hash value calculated from pod template and
  1198  // a collisionCount to avoid hash collision. The hash will be safe encoded to
  1199  // avoid bad words.
  1200  func ComputeHash(template *v1.PodTemplateSpec, collisionCount *int32) string {
  1201  	podTemplateSpecHasher := fnv.New32a()
  1202  	hashutil.DeepHashObject(podTemplateSpecHasher, *template)
  1203  
  1204  	// Add collisionCount in the hash if it exists.
  1205  	if collisionCount != nil {
  1206  		collisionCountBytes := make([]byte, 8)
  1207  		binary.LittleEndian.PutUint32(collisionCountBytes, uint32(*collisionCount))
  1208  		podTemplateSpecHasher.Write(collisionCountBytes)
  1209  	}
  1210  
  1211  	return rand.SafeEncodeString(fmt.Sprint(podTemplateSpecHasher.Sum32()))
  1212  }
  1213  
  1214  func AddOrUpdateLabelsOnNode(kubeClient clientset.Interface, nodeName string, labelsToUpdate map[string]string) error {
  1215  	firstTry := true
  1216  	return clientretry.RetryOnConflict(UpdateLabelBackoff, func() error {
  1217  		var err error
  1218  		var node *v1.Node
  1219  		// First we try getting node from the API server cache, as it's cheaper. If it fails
  1220  		// we get it from etcd to be sure to have fresh data.
  1221  		option := metav1.GetOptions{}
  1222  		if firstTry {
  1223  			option.ResourceVersion = "0"
  1224  			firstTry = false
  1225  		}
  1226  		node, err = kubeClient.CoreV1().Nodes().Get(context.TODO(), nodeName, option)
  1227  		if err != nil {
  1228  			return err
  1229  		}
  1230  
  1231  		// Make a copy of the node and update the labels.
  1232  		newNode := node.DeepCopy()
  1233  		if newNode.Labels == nil {
  1234  			newNode.Labels = make(map[string]string)
  1235  		}
  1236  		for key, value := range labelsToUpdate {
  1237  			newNode.Labels[key] = value
  1238  		}
  1239  
  1240  		oldData, err := json.Marshal(node)
  1241  		if err != nil {
  1242  			return fmt.Errorf("failed to marshal the existing node %#v: %v", node, err)
  1243  		}
  1244  		newData, err := json.Marshal(newNode)
  1245  		if err != nil {
  1246  			return fmt.Errorf("failed to marshal the new node %#v: %v", newNode, err)
  1247  		}
  1248  		patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, &v1.Node{})
  1249  		if err != nil {
  1250  			return fmt.Errorf("failed to create a two-way merge patch: %v", err)
  1251  		}
  1252  		if _, err := kubeClient.CoreV1().Nodes().Patch(context.TODO(), node.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}); err != nil {
  1253  			return fmt.Errorf("failed to patch the node: %v", err)
  1254  		}
  1255  		return nil
  1256  	})
  1257  }