github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/tide/tide.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package tide
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"reflect"
    23  	"sort"
    24  	"time"
    25  
    26  	corev1 "k8s.io/api/core/v1"
    27  	"k8s.io/apimachinery/pkg/api/errors"
    28  	"k8s.io/apimachinery/pkg/api/resource"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	"k8s.io/apimachinery/pkg/labels"
    31  	"k8s.io/apimachinery/pkg/types"
    32  	"k8s.io/apimachinery/pkg/util/intstr"
    33  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    34  	"k8s.io/apimachinery/pkg/util/wait"
    35  	"k8s.io/autoscaler/cluster-autoscaler/simulator"
    36  	corelisters "k8s.io/client-go/listers/core/v1"
    37  	"k8s.io/client-go/tools/cache"
    38  	"k8s.io/client-go/util/workqueue"
    39  	"k8s.io/klog/v2"
    40  	podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
    41  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    42  	nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
    43  
    44  	apis "github.com/kubewharf/katalyst-api/pkg/apis/tide/v1alpha1"
    45  	listers "github.com/kubewharf/katalyst-api/pkg/client/listers/tide/v1alpha1"
    46  	katalystbase "github.com/kubewharf/katalyst-core/cmd/base"
    47  	"github.com/kubewharf/katalyst-core/pkg/client"
    48  	"github.com/kubewharf/katalyst-core/pkg/config/controller"
    49  	"github.com/kubewharf/katalyst-core/pkg/config/generic"
    50  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    51  )
    52  
    53  const (
    54  	tideControllerName   = "tide"
    55  	tideCycleWorkerCount = 1
    56  )
    57  
    58  const (
    59  	tidePeriod = 10 * time.Second
    60  )
    61  
    62  type OnlinePodChecker func(pod *corev1.Pod) bool
    63  
    64  type NodeInfo struct {
    65  	NodeUsage
    66  }
    67  
    68  // NodeUsage stores a node's info, pods on it, thresholds and its resource usage
    69  type NodeUsage struct {
    70  	node    *corev1.Node
    71  	usage   map[corev1.ResourceName]*resource.Quantity
    72  	allPods []*corev1.Pod
    73  }
    74  
    75  type Tide struct {
    76  	ctx context.Context
    77  
    78  	client *client.GenericClientSet
    79  
    80  	checker simulator.PredicateChecker
    81  
    82  	nodeListerSynced cache.InformerSynced
    83  	nodeLister       corelisters.NodeLister
    84  	podListerSynced  cache.InformerSynced
    85  	podLister        corelisters.PodLister
    86  	tideListerSynced cache.InformerSynced
    87  	tideLister       listers.TideNodePoolLister
    88  
    89  	// queue for node
    90  	syncQueue workqueue.RateLimitingInterface
    91  
    92  	// metricsEmitter for emit metrics
    93  	metricsEmitter metrics.MetricEmitter
    94  }
    95  
    96  func NewTide(ctx context.Context,
    97  	controlCtx *katalystbase.GenericContext,
    98  	_ *generic.GenericConfiguration,
    99  	_ *controller.GenericControllerConfiguration,
   100  ) (*Tide, error) {
   101  	tide := &Tide{
   102  		ctx:    ctx,
   103  		client: controlCtx.Client,
   104  		syncQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(),
   105  			tideControllerName),
   106  	}
   107  	checker, err := simulator.NewSchedulerBasedPredicateChecker(controlCtx.Client.KubeClient, ctx.Done())
   108  	if err != nil {
   109  		return nil, err
   110  	}
   111  	tide.checker = checker
   112  
   113  	controlCtx.KubeInformerFactory.Core().V1().Nodes().Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   114  		AddFunc:    tide.addNodeEventHandle,
   115  		UpdateFunc: tide.updateNodeEventHandle,
   116  	})
   117  	tide.nodeListerSynced = controlCtx.KubeInformerFactory.Core().V1().Nodes().Informer().HasSynced
   118  	tide.nodeLister = controlCtx.KubeInformerFactory.Core().V1().Nodes().Lister()
   119  
   120  	tide.podListerSynced = controlCtx.KubeInformerFactory.Core().V1().Pods().Informer().HasSynced
   121  	tide.podLister = controlCtx.KubeInformerFactory.Core().V1().Pods().Lister()
   122  
   123  	controlCtx.InternalInformerFactory.Tide()
   124  	controlCtx.InternalInformerFactory.Tide().V1alpha1().TideNodePools().Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   125  		AddFunc:    tide.addTideNodePoolEventHandle,
   126  		UpdateFunc: tide.updateTideNodePoolEventHandle,
   127  		DeleteFunc: tide.deleteTideNodePoolEventHandle,
   128  	})
   129  	tide.tideLister = controlCtx.InternalInformerFactory.Tide().V1alpha1().TideNodePools().Lister()
   130  	tide.tideListerSynced = controlCtx.InternalInformerFactory.Tide().V1alpha1().TideNodePools().Informer().HasSynced
   131  
   132  	tide.metricsEmitter = controlCtx.EmitterPool.GetDefaultMetricsEmitter()
   133  
   134  	return tide, nil
   135  }
   136  
   137  func (t *Tide) Run() {
   138  	defer utilruntime.HandleCrash()
   139  	defer t.syncQueue.ShutDown()
   140  
   141  	defer klog.Infof("Shutting down %s controller", tideControllerName)
   142  
   143  	if !cache.WaitForCacheSync(t.ctx.Done(), t.nodeListerSynced, t.tideListerSynced, t.podListerSynced) {
   144  		utilruntime.HandleError(fmt.Errorf("unable to sync caches for %s controller", tideControllerName))
   145  		return
   146  	}
   147  	klog.Infof("Caches are synced for %s controller", tideControllerName)
   148  	klog.Infof("start %d workers for %s controller", tideCycleWorkerCount, tideControllerName)
   149  
   150  	go wait.Until(t.periodSync, tidePeriod, t.ctx.Done())
   151  	for i := 0; i < tideCycleWorkerCount; i++ {
   152  		go wait.Until(t.worker, time.Second, t.ctx.Done())
   153  	}
   154  
   155  	<-t.ctx.Done()
   156  }
   157  
   158  func (t *Tide) addNodeEventHandle(obj interface{}) {
   159  	tideNodePoolList, err := t.tideLister.List(labels.Everything())
   160  	if err != nil {
   161  		klog.Errorf("list tide hybrid node pool failed: %v", err)
   162  		return
   163  	}
   164  	n, ok := obj.(*corev1.Node)
   165  	if !ok {
   166  		klog.Errorf("cannot convert obj to *apis.TideNodePool: %v", obj)
   167  		return
   168  	}
   169  	for _, tideNodePool := range tideNodePoolList {
   170  		if labels.SelectorFromSet(tideNodePool.Spec.NodeConfigs.NodeSelector).
   171  			Matches(labels.Set(n.GetLabels())) {
   172  			klog.Infof("start to sync node pool, name: %s", tideNodePool.Name)
   173  			t.enqueueWorkItem(tideNodePool)
   174  		}
   175  	}
   176  }
   177  
   178  func (t *Tide) updateNodeEventHandle(old, cur interface{}) {
   179  	tideNodePoolList, err := t.tideLister.List(labels.Everything())
   180  	if err != nil {
   181  		klog.Errorf("list tide hybrid node pool failed: %v", err)
   182  		return
   183  	}
   184  	n, ok := old.(*corev1.Node)
   185  	if !ok {
   186  		klog.Errorf("cannot convert obj to *apis.TideNodePool: %v", old)
   187  		return
   188  	}
   189  	for _, tideNodePool := range tideNodePoolList {
   190  		if labels.SelectorFromSet(tideNodePool.Spec.NodeConfigs.NodeSelector).
   191  			Matches(labels.Set(n.GetLabels())) {
   192  			klog.Infof("start to sync node pool, name: %s", tideNodePool.Name)
   193  			t.enqueueWorkItem(tideNodePool)
   194  		}
   195  	}
   196  }
   197  
   198  func (t *Tide) addTideNodePoolEventHandle(obj interface{}) {
   199  	c, ok := obj.(*apis.TideNodePool)
   200  	if !ok {
   201  		klog.Errorf("cannot convert obj to *apis.TideNodePool: %v", obj)
   202  		return
   203  	}
   204  	klog.V(4).Infof("notice addition of tide node pool %s", c.Name)
   205  
   206  	t.enqueueWorkItem(obj)
   207  }
   208  
   209  func (t *Tide) updateTideNodePoolEventHandle(_, new interface{}) {
   210  	c, ok := new.(*apis.TideNodePool)
   211  	if !ok {
   212  		klog.Errorf("cannot convert oldObj to *apis.TideNodePool: %v", c)
   213  		return
   214  	}
   215  	klog.V(4).Infof("notice addition of tide node pool %s", c.Name)
   216  
   217  	t.enqueueWorkItem(new)
   218  }
   219  
   220  func (t *Tide) deleteTideNodePoolEventHandle(obj interface{}) {
   221  	c, ok := obj.(*apis.TideNodePool)
   222  	if !ok {
   223  		klog.Errorf("cannot convert oldObj to *apis.TideNodePool: %v", c)
   224  		return
   225  	}
   226  	klog.V(4).Infof("notice addition of tide node pool %s", c.Name)
   227  
   228  	t.enqueueWorkItem(obj)
   229  }
   230  
   231  func (t *Tide) worker() {
   232  	for t.processNextWorkItem(context.Background()) {
   233  	}
   234  }
   235  
   236  // processNextWorkItem dequeues items, processes them, and marks them done.
   237  // It enforces that the sync is never invoked concurrently with the same key.
   238  func (t *Tide) processNextWorkItem(ctx context.Context) bool {
   239  	key, quit := t.syncQueue.Get()
   240  	if quit {
   241  		return false
   242  	}
   243  	defer t.syncQueue.Done(key)
   244  
   245  	err := t.sync(ctx, key.(string))
   246  	if err == nil {
   247  		t.syncQueue.Forget(key)
   248  		return true
   249  	}
   250  
   251  	utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err))
   252  	t.syncQueue.AddRateLimited(key)
   253  
   254  	return true
   255  }
   256  
   257  // enqueueWorkItem enqueues the given node in the work queue.
   258  func (t *Tide) enqueueWorkItem(obj interface{}) {
   259  	key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
   260  	if err != nil {
   261  		utilruntime.HandleError(fmt.Errorf("Cound't get key for object %+v: %v", obj, err))
   262  		return
   263  	}
   264  	t.syncQueue.Add(key)
   265  }
   266  
   267  // sync syncs the given node.
   268  func (t *Tide) sync(ctx context.Context, key string) error {
   269  	// TODO
   270  	_, name, err := cache.SplitMetaNamespaceKey(key)
   271  	if err != nil {
   272  		return err
   273  	}
   274  	tideNodePool, err := t.tideLister.Get(name)
   275  	if errors.IsNotFound(err) {
   276  		klog.Infof("node has been deleted %v", key)
   277  		return nil
   278  	}
   279  	if err != nil {
   280  		return err
   281  	}
   282  
   283  	err = t.Reconcile(ctx, tideNodePool.DeepCopy())
   284  	if err != nil {
   285  		return err
   286  	}
   287  
   288  	return nil
   289  }
   290  
   291  func (t *Tide) reconcileDelete(ctx context.Context, tideNodePool *apis.TideNodePool) error {
   292  	nodes, err := t.nodeLister.List(labels.SelectorFromSet(map[string]string{LabelNodePoolKey: tideNodePool.Name}))
   293  	if err != nil {
   294  		klog.Errorf("fail to list nodes: %v", err)
   295  		return err
   296  	}
   297  	for _, node := range nodes {
   298  		if err := t.cleanNode(ctx, node.DeepCopy(), tideNodePool); err != nil {
   299  			return err
   300  		}
   301  	}
   302  	// Remove finalizer first
   303  	controllerutil.RemoveFinalizer(tideNodePool, NodePoolFinalizer)
   304  	_, err = t.client.InternalClient.TideV1alpha1().TideNodePools().Update(ctx, tideNodePool, metav1.UpdateOptions{})
   305  	return err
   306  }
   307  
   308  func (t *Tide) cleanNode(ctx context.Context, node *corev1.Node, pool *apis.TideNodePool) error {
   309  	nodePoolWrapper := NewNodePoolWrapper(pool)
   310  	var foundIndexes []int
   311  	for i := range node.Spec.Taints {
   312  		if node.Spec.Taints[i].Key == nodePoolWrapper.GetEvictOnlinePodTaint().Key ||
   313  			node.Spec.Taints[i].Key == nodePoolWrapper.GetEvictOfflinePodTaint().Key {
   314  			foundIndexes = append(foundIndexes, i)
   315  		}
   316  	}
   317  	if len(foundIndexes) >= 0 {
   318  		for i := len(foundIndexes) - 1; i >= 0; i-- {
   319  			s := foundIndexes[i]
   320  			node.Spec.Taints = append(node.Spec.Taints[:s], node.Spec.Taints[s+1:]...)
   321  		}
   322  	}
   323  
   324  	delete(node.Labels, nodePoolWrapper.GetOnlineLabel().Key)
   325  	delete(node.Labels, nodePoolWrapper.GetOfflineLabel().Key)
   326  	delete(node.Labels, nodePoolWrapper.GetTideLabel().Key)
   327  	delete(node.Labels, LabelReserveNode)
   328  	delete(node.Labels, LabelNodeTypeKey)
   329  	delete(node.Labels, LabelNodePoolKey)
   330  	_, err := t.client.KubeClient.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{})
   331  	return err
   332  }
   333  
   334  func (t *Tide) Reconcile(ctx context.Context, tideNodePool *apis.TideNodePool) error {
   335  	logger := klog.FromContext(ctx).WithValues("tideNodePool", tideNodePool.GetName())
   336  	logger.V(2).Info("start Reconcile")
   337  	defer logger.V(2).Info("end Reconcile")
   338  	// Add finalizer first
   339  	if !controllerutil.ContainsFinalizer(tideNodePool, NodePoolFinalizer) && tideNodePool.DeletionTimestamp.IsZero() {
   340  		controllerutil.AddFinalizer(tideNodePool, NodePoolFinalizer)
   341  		tideNodePool, err := t.client.InternalClient.TideV1alpha1().TideNodePools().Update(ctx, tideNodePool, metav1.UpdateOptions{})
   342  		if err != nil {
   343  			klog.ErrorS(err, "fail to add finalizer", "rule", tideNodePool.Name)
   344  			return err
   345  		}
   346  	}
   347  	// process deletion
   348  	if !tideNodePool.DeletionTimestamp.IsZero() {
   349  		return t.reconcileDelete(ctx, tideNodePool)
   350  	}
   351  	nodes, err := t.nodeLister.List(labels.SelectorFromSet(tideNodePool.Spec.NodeConfigs.NodeSelector))
   352  	if err != nil {
   353  		klog.Errorf("fail to list nodes: %v", err)
   354  		return err
   355  	}
   356  	onlineNodesExpectCount, err := intstr.GetScaledValueFromIntOrPercent(tideNodePool.Spec.NodeConfigs.Reserve.Online, len(nodes), true)
   357  	if err != nil {
   358  		klog.Errorf("fail to get online nodes number: %v", err)
   359  		return err
   360  	}
   361  	offlineNodesExpectCount, err := intstr.GetScaledValueFromIntOrPercent(tideNodePool.Spec.NodeConfigs.Reserve.Offline, len(nodes), false)
   362  	if err != nil {
   363  		klog.Errorf("fail to get offline nodes number: %v", err)
   364  		return err
   365  	}
   366  	nodePoolWrapper := NewNodePoolWrapper(tideNodePool)
   367  	reserveOnlineNodes, reserveOfflineNodes, tideNodes, unknownNodes := classifyNodes(nodes, NewNodePoolWrapper(tideNodePool))
   368  	onlineNodeCount, offlineNodeCount := len(reserveOnlineNodes), len(reserveOfflineNodes)
   369  	for i := 0; i < len(reserveOnlineNodes) && onlineNodeCount > onlineNodesExpectCount; i++ {
   370  		nodePoolWrapper.SetNodeToTide(reserveOnlineNodes[i])
   371  		if _, err := t.client.KubeClient.CoreV1().Nodes().Update(ctx, reserveOnlineNodes[i], metav1.UpdateOptions{}); err != nil {
   372  			klog.Errorf("fail to convert online reserve nodes to tide: %v", err)
   373  			return err
   374  		}
   375  		onlineNodeCount--
   376  	}
   377  
   378  	for i := 0; i < len(reserveOfflineNodes) && offlineNodeCount > offlineNodesExpectCount; i++ {
   379  		nodePoolWrapper.SetNodeToTide(reserveOfflineNodes[i])
   380  		if _, err := t.client.KubeClient.CoreV1().Nodes().Update(ctx, reserveOfflineNodes[i], metav1.UpdateOptions{}); err != nil {
   381  			klog.Errorf("fail to convert offline reserve nodes to tide: %v", err)
   382  			return err
   383  		}
   384  		offlineNodeCount--
   385  	}
   386  
   387  	for i := range unknownNodes {
   388  		if onlineNodeCount < onlineNodesExpectCount {
   389  			nodePoolWrapper.SetNodeToOnlineReserve(unknownNodes[i])
   390  			if _, err := t.client.KubeClient.CoreV1().Nodes().Update(ctx, unknownNodes[i], metav1.UpdateOptions{}); err != nil {
   391  				klog.Errorf("fail to convert new nodes to reserve: %v", err)
   392  				return err
   393  			}
   394  			reserveOnlineNodes = append(reserveOnlineNodes, unknownNodes[i])
   395  			onlineNodeCount++
   396  		} else if offlineNodeCount < offlineNodesExpectCount {
   397  			nodePoolWrapper.SetNodeToOfflineReserve(unknownNodes[i])
   398  			if _, err := t.client.KubeClient.CoreV1().Nodes().Update(ctx, unknownNodes[i], metav1.UpdateOptions{}); err != nil {
   399  				klog.Errorf("fail to convert new nodes to reserve: %v", err)
   400  				return err
   401  			}
   402  			reserveOfflineNodes = append(reserveOfflineNodes, unknownNodes[i])
   403  			offlineNodeCount++
   404  		} else {
   405  			nodePoolWrapper.SetNodeToTideOnline(unknownNodes[i])
   406  			if _, err := t.client.KubeClient.CoreV1().Nodes().Update(ctx, unknownNodes[i], metav1.UpdateOptions{}); err != nil {
   407  				klog.Errorf("fail to convert offline reserve nodes to tide: %v", err)
   408  				return err
   409  			}
   410  			tideNodes = append(tideNodes, unknownNodes[i])
   411  		}
   412  	}
   413  
   414  	if err := t.UpdateStatusByNodes(ctx, tideNodePool, reserveOnlineNodes, reserveOfflineNodes, tideNodes); err != nil {
   415  		return err
   416  	}
   417  	onlineLabelSet := labels.SelectorFromSet(map[string]string{LabelPodTypeKey: LabelOnlinePodValue})
   418  	onlinePodChecker := func(pod *corev1.Pod) bool {
   419  		return onlineLabelSet.Matches(labels.Set(pod.GetLabels()))
   420  	}
   421  
   422  	if err := t.RunOnce(ctx,
   423  		onlinePodChecker,
   424  		nodePoolWrapper); err != nil {
   425  		klog.Errorf("try to balance node failed: %v", err)
   426  		return err
   427  	}
   428  	return nil
   429  }
   430  
   431  func (t *Tide) UpdateStatusByNodes(ctx context.Context, tideNodePool *apis.TideNodePool, reserveOnlineNodes, reserveOfflineNodes, tideNodes []*corev1.Node) error {
   432  	newTideNodePool := tideNodePool.DeepCopy()
   433  
   434  	var onlineNodeNames, offlineNodeNames, tideNodeNames []string
   435  	for i := range reserveOnlineNodes {
   436  		onlineNodeNames = append(onlineNodeNames, reserveOnlineNodes[i].Name)
   437  	}
   438  	for i := range reserveOfflineNodes {
   439  		offlineNodeNames = append(offlineNodeNames, reserveOfflineNodes[i].Name)
   440  	}
   441  
   442  	for i := range tideNodes {
   443  		tideNodeNames = append(tideNodeNames, tideNodes[i].Name)
   444  	}
   445  
   446  	sortNodeName(onlineNodeNames)
   447  	sortNodeName(offlineNodeNames)
   448  	sortNodeName(tideNodeNames)
   449  	newTideNodePool.Status.ReserveNodes.OnlineNodes = onlineNodeNames
   450  	newTideNodePool.Status.ReserveNodes.OfflineNodes = offlineNodeNames
   451  	newTideNodePool.Status.TideNodes.Nodes = tideNodeNames
   452  	if reflect.DeepEqual(newTideNodePool.Status, tideNodePool.Status) {
   453  		return nil
   454  	}
   455  	_, err := t.client.InternalClient.TideV1alpha1().TideNodePools().UpdateStatus(ctx, newTideNodePool, metav1.UpdateOptions{})
   456  	return err
   457  }
   458  
   459  func sortNodeName(data []string) {
   460  	sort.SliceStable(data, func(i, j int) bool {
   461  		return data[i] > data[j]
   462  	})
   463  }
   464  
   465  func classifyNodes(nodes []*corev1.Node, tideNodePool NodePoolWrapper) (
   466  	reserveOnlineNodes []*corev1.Node,
   467  	reserveOfflineNodes []*corev1.Node,
   468  	tideNodes []*corev1.Node,
   469  	unknownNodes []*corev1.Node,
   470  ) {
   471  	for i, node := range nodes {
   472  		nodeLabels := labels.Set(node.GetLabels())
   473  		switch {
   474  		case tideNodePool.GetOnlineReserveNodeSelector().Matches(nodeLabels):
   475  			reserveOnlineNodes = append(reserveOnlineNodes, nodes[i].DeepCopy())
   476  		case tideNodePool.GetOfflineReserveNodeSelector().Matches(nodeLabels):
   477  			reserveOfflineNodes = append(reserveOfflineNodes, nodes[i].DeepCopy())
   478  		case tideNodePool.GetTideNodeSelector().Matches(nodeLabels):
   479  			tideNodes = append(tideNodes, nodes[i].DeepCopy())
   480  		case !tideNodePool.GetNodePoolSelector().Matches(nodeLabels):
   481  			unknownNodes = append(unknownNodes, nodes[i].DeepCopy())
   482  		default:
   483  			// do nothing
   484  		}
   485  	}
   486  	return
   487  }
   488  
   489  func (t *Tide) GetNodePoolInfo(nodes []*corev1.Node, onlinePodChecker OnlinePodChecker) (simulator.ClusterSnapshot, []*corev1.Pod, error) {
   490  	clusterSnapshot := simulator.NewBasicClusterSnapshot()
   491  	pods, err := t.podLister.List(labels.Everything())
   492  	if err != nil {
   493  		return nil, nil, err
   494  	}
   495  	var pendingPods []*corev1.Pod
   496  	knownNodes := map[string]bool{}
   497  	for i := range nodes {
   498  		if err := clusterSnapshot.AddNode(nodes[i].DeepCopy()); err != nil {
   499  			return nil, nil, err
   500  		}
   501  		knownNodes[nodes[i].Name] = true
   502  	}
   503  
   504  	for i, pod := range pods {
   505  		if knownNodes[pod.Spec.NodeName] {
   506  			if err := clusterSnapshot.AddPod(pods[i].DeepCopy(), pod.Spec.NodeName); err != nil {
   507  				return nil, nil, err
   508  			}
   509  		} else if checkPendingOnlinePod(pods[i], onlinePodChecker) {
   510  			pendingPods = append(pendingPods, pods[i])
   511  		}
   512  	}
   513  	return clusterSnapshot, pendingPods, nil
   514  }
   515  
   516  func checkPendingOnlinePod(pod *corev1.Pod, onlinePodChecker OnlinePodChecker) bool {
   517  	if !(pod.Spec.NodeName == "" && pod.Status.Phase != corev1.PodSucceeded && pod.Status.Phase != corev1.PodFailed) {
   518  		return false
   519  	}
   520  	_, condition := podv1.GetPodCondition(&pod.Status, corev1.PodScheduled)
   521  	if condition == nil {
   522  		return false
   523  	}
   524  	return condition.Status == corev1.ConditionFalse && condition.Reason == corev1.PodReasonUnschedulable && onlinePodChecker(pod)
   525  }
   526  
   527  func (t *Tide) RunOnce(ctx context.Context, onlinePodChecker OnlinePodChecker, tideNodePool NodePoolWrapper) error {
   528  	logger := klog.FromContext(ctx).WithValues("tideNodePool", tideNodePool.GetName())
   529  	nodeList, err := t.nodeLister.List(labels.Everything())
   530  	if err != nil {
   531  		return err
   532  	}
   533  
   534  	clusterSnapshot, pendingPods, err := t.GetNodePoolInfo(nodeList, onlinePodChecker)
   535  	if err != nil {
   536  		return err
   537  	}
   538  	// assuming that the online business is pending
   539  	// prioritizing the resolution of the online business pending issue is recommended
   540  	if len(pendingPods) != 0 {
   541  		offlineNodesInfos, err := getNodeUsageWithSelector(clusterSnapshot, []corev1.ResourceName{"cpu", "memory"}, tideNodePool.GetOfflineTideNodeSelector())
   542  		if err != nil {
   543  			return err
   544  		}
   545  		if len(offlineNodesInfos) <= 0 {
   546  			logger.Info("no offline node in tidal")
   547  			return nil
   548  		}
   549  		for _, pod := range pendingPods {
   550  			_, err := t.checker.FitsAnyNode(clusterSnapshot, pod)
   551  			if err == nil {
   552  				logger.Info("pod can fit node", "pod", types.NamespacedName{
   553  					Namespace: pod.Namespace,
   554  					Name:      pod.Name,
   555  				})
   556  				continue
   557  			}
   558  			for j := range offlineNodesInfos {
   559  				offlineNodesInfo := offlineNodesInfos[j]
   560  				nodeInfo, err := clusterSnapshot.NodeInfos().Get(offlineNodesInfo.node.Name)
   561  				if err != nil {
   562  					return err
   563  				}
   564  				clusterSnapshot.RemoveNode(offlineNodesInfo.node.Name)
   565  				node := t.changeNodeToOnline(nodeInfo.Node(), tideNodePool)
   566  				clusterSnapshot.AddNode(node)
   567  				_, err = t.checker.FitsAnyNode(clusterSnapshot, pod)
   568  				if err != nil {
   569  					logger.Info("pod not fit offline node after release offline node, skip", "pod", types.NamespacedName{
   570  						Namespace: pod.Namespace,
   571  						Name:      pod.Name,
   572  					})
   573  					// rollback node to offline
   574  					clusterSnapshot.RemoveNode(offlineNodesInfo.node.Name)
   575  					node := t.changeNodeToOffline(nodeInfo.Node(), tideNodePool)
   576  					clusterSnapshot.AddNode(node)
   577  					continue
   578  				}
   579  				if _, err := t.client.KubeClient.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{}); err != nil {
   580  					return fmt.Errorf("update node offline to online failed: %v", err)
   581  				} else {
   582  					logger.Info("release offline node to online node", "pod", types.NamespacedName{
   583  						Namespace: pod.Namespace,
   584  						Name:      pod.Name,
   585  					}, "node", node.Name)
   586  					return nil
   587  				}
   588  			}
   589  		}
   590  		logger.Info("not need release offline node")
   591  	}
   592  	// 1. select the online node with the lowest usage
   593  	// 2. pre-schedule all online pods (request from largest to smallest) and check whether they can all be scheduled normally
   594  	// 3. start triggering scheduling by tainting
   595  	onlineNodesInfos, err := getNodeUsageWithSelector(clusterSnapshot, []corev1.ResourceName{"cpu", "memory"}, tideNodePool.GetOnlineTideNodeSelector())
   596  	if err != nil {
   597  		return err
   598  	}
   599  	// skip if no online node
   600  	if len(onlineNodesInfos) <= 1 {
   601  		logger.Info("no online node in tidal")
   602  		return nil
   603  	}
   604  	onlineNodesInfo := onlineNodesInfos[0]
   605  	podsInNode := onlineNodesInfo.allPods
   606  	nodeInfo, err := clusterSnapshot.NodeInfos().Get(onlineNodesInfo.node.Name)
   607  	if err != nil {
   608  		return err
   609  	}
   610  	clusterSnapshot.RemoveNode(onlineNodesInfo.node.Name)
   611  	for _, pod := range podsInNode {
   612  		if onlinePodChecker(pod) {
   613  			pod.Spec.NodeName = ""
   614  			nodeName, err := t.checker.FitsAnyNode(clusterSnapshot, pod)
   615  			if err != nil {
   616  				logger.Info("can not release online node to offline", "node", onlineNodesInfo.node.Name)
   617  				return nil
   618  			}
   619  			pod.Spec.NodeName = nodeName
   620  			clusterSnapshot.AddPod(pod, nodeName)
   621  		}
   622  	}
   623  	node := t.changeNodeToOffline(nodeInfo.Node(), tideNodePool)
   624  	if _, err := t.client.KubeClient.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{}); err != nil {
   625  		return err
   626  	}
   627  	logger.Info("release online node success", "node", node.Name)
   628  	return nil
   629  }
   630  
   631  func (t *Tide) changeNodeToOnline(node *corev1.Node, pool NodePoolWrapper) *corev1.Node {
   632  	found := false
   633  	for i := range node.Spec.Taints {
   634  		if node.Spec.Taints[i].Key == pool.GetEvictOnlinePodTaint().Key || node.Spec.Taints[i].Key == pool.GetEvictOfflinePodTaint().Key {
   635  			node.Spec.Taints[i].Key = pool.GetEvictOfflinePodTaint().Key
   636  			node.Spec.Taints[i].Value = pool.GetEvictOfflinePodTaint().Value
   637  			node.Spec.Taints[i].Effect = corev1.TaintEffect(pool.GetEvictOfflinePodTaint().Effect)
   638  			found = true
   639  		}
   640  	}
   641  	if !found {
   642  		node.Spec.Taints = append(node.Spec.Taints, corev1.Taint{
   643  			Key:    pool.GetEvictOfflinePodTaint().Key,
   644  			Value:  pool.GetEvictOnlinePodTaint().Value,
   645  			Effect: corev1.TaintEffect(pool.GetEvictOnlinePodTaint().Effect),
   646  		})
   647  	}
   648  	delete(node.Labels, pool.GetOfflineLabel().Key)
   649  	node.Labels[pool.GetOnlineLabel().Key] = pool.GetOnlineLabel().Value
   650  	return node
   651  }
   652  
   653  func (t *Tide) changeNodeToOffline(node *corev1.Node, pool NodePoolWrapper) *corev1.Node {
   654  	found := false
   655  	for i := range node.Spec.Taints {
   656  		if node.Spec.Taints[i].Key == pool.GetEvictOnlinePodTaint().Key || node.Spec.Taints[i].Key == pool.GetEvictOfflinePodTaint().Key {
   657  			node.Spec.Taints[i].Key = pool.GetEvictOnlinePodTaint().Key
   658  			node.Spec.Taints[i].Value = pool.GetEvictOnlinePodTaint().Value
   659  			node.Spec.Taints[i].Effect = corev1.TaintEffect(pool.GetEvictOnlinePodTaint().Effect)
   660  			found = true
   661  		}
   662  	}
   663  	if !found {
   664  		node.Spec.Taints = append(node.Spec.Taints, corev1.Taint{
   665  			Key:    pool.GetEvictOfflinePodTaint().Key,
   666  			Value:  pool.GetEvictOnlinePodTaint().Value,
   667  			Effect: corev1.TaintEffect(pool.GetEvictOnlinePodTaint().Effect),
   668  		})
   669  	}
   670  	delete(node.Labels, pool.GetOnlineLabel().Key)
   671  	node.Labels[pool.GetOfflineLabel().Key] = pool.GetOfflineLabel().Value
   672  	return node
   673  }
   674  
   675  func getNodeUsageWithSelector(
   676  	nodes simulator.ClusterSnapshot,
   677  	resourceNames []corev1.ResourceName,
   678  	selector labels.Selector,
   679  ) ([]NodeUsage, error) {
   680  	var nodeUsageList []NodeUsage
   681  	nodeInfos, err := nodes.NodeInfos().List()
   682  	if err != nil {
   683  		return nil, err
   684  	}
   685  	for _, node := range nodeInfos {
   686  		var pods []*corev1.Pod
   687  		for i := range node.Pods {
   688  			pods = append(pods, node.Pods[i].Pod)
   689  		}
   690  		if !selector.Matches(labels.Set(node.Node().GetLabels())) {
   691  			continue
   692  		}
   693  		nodeUsageList = append(nodeUsageList, NodeUsage{
   694  			node:    node.Node(),
   695  			usage:   nodeutil.NodeUtilization(pods, resourceNames),
   696  			allPods: pods,
   697  		})
   698  	}
   699  	// nodes are sorted by usage rate
   700  	sort.Slice(nodeUsageList, func(i, j int) bool {
   701  		ti := nodeUsageList[i].usage[corev1.ResourceMemory].Value() + nodeUsageList[i].usage[corev1.ResourceCPU].MilliValue() + nodeUsageList[i].usage[corev1.ResourcePods].Value()
   702  		tj := nodeUsageList[j].usage[corev1.ResourceMemory].Value() + nodeUsageList[j].usage[corev1.ResourceCPU].MilliValue() + nodeUsageList[j].usage[corev1.ResourcePods].Value()
   703  		// extended resources
   704  		for name := range nodeUsageList[i].usage {
   705  			if !nodeutil.IsBasicResource(name) {
   706  				ti = ti + nodeUsageList[i].usage[name].Value()
   707  				tj = tj + nodeUsageList[j].usage[name].Value()
   708  			}
   709  		}
   710  		return ti < tj
   711  	})
   712  	return nodeUsageList, nil
   713  }
   714  
   715  func (t *Tide) periodSync() {
   716  	targetSelector := labels.Everything()
   717  	tides, err := t.tideLister.List(targetSelector)
   718  	if err != nil {
   719  		klog.Errorf("failed to list all tide node pool")
   720  		return
   721  	}
   722  
   723  	for _, tide := range tides {
   724  		t.enqueueWorkItem(tide)
   725  	}
   726  }