github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/vpa/vpa.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vpa
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sync"
    23  	"time"
    24  
    25  	core "k8s.io/api/core/v1"
    26  	apiequality "k8s.io/apimachinery/pkg/api/equality"
    27  	"k8s.io/apimachinery/pkg/api/errors"
    28  	"k8s.io/apimachinery/pkg/api/meta"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    31  	"k8s.io/apimachinery/pkg/labels"
    32  	"k8s.io/apimachinery/pkg/runtime/schema"
    33  	utilerrors "k8s.io/apimachinery/pkg/util/errors"
    34  	"k8s.io/apimachinery/pkg/util/json"
    35  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    36  	"k8s.io/apimachinery/pkg/util/wait"
    37  	corelisters "k8s.io/client-go/listers/core/v1"
    38  	"k8s.io/client-go/tools/cache"
    39  	"k8s.io/client-go/util/workqueue"
    40  	"k8s.io/klog/v2"
    41  
    42  	apis "github.com/kubewharf/katalyst-api/pkg/apis/autoscaling/v1alpha1"
    43  	autoscalelister "github.com/kubewharf/katalyst-api/pkg/client/listers/autoscaling/v1alpha1"
    44  	apiconsts "github.com/kubewharf/katalyst-api/pkg/consts"
    45  	katalyst_base "github.com/kubewharf/katalyst-core/cmd/base"
    46  	"github.com/kubewharf/katalyst-core/pkg/client/control"
    47  	"github.com/kubewharf/katalyst-core/pkg/config/controller"
    48  	"github.com/kubewharf/katalyst-core/pkg/config/generic"
    49  	"github.com/kubewharf/katalyst-core/pkg/consts"
    50  	"github.com/kubewharf/katalyst-core/pkg/controller/vpa/util"
    51  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    52  	katalystutil "github.com/kubewharf/katalyst-core/pkg/util"
    53  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    54  )
    55  
    56  const vpaControllerName = "vpa"
    57  
    58  const (
    59  	metricNameVAPControlVPASync          = "vpa_vpa_sync"
    60  	metricNameVAPControlVPASyncCosts     = "vpa_vpa_sync_costs"
    61  	metricNameVAPControlGetWorkloadCosts = "vpa_vpa_get_workload_costs"
    62  	metricNameVAPControlVPAPatchCosts    = "vpa_vpa_patch_costs"
    63  	metricNameVAPControlSyncPodCosts     = "vpa_vpa_sync_pod_costs"
    64  	metricNameVAPControlVPAUpdateCosts   = "vpa_vpa_update_costs"
    65  
    66  	metricNameVAPControlVPAPodCount = "vpa_pod_count"
    67  )
    68  
    69  // VPAController is responsible to update pod resources according to
    70  // recommended results in vpa status.
    71  //
    72  // although we use informer index mechanism to speed up the looking
    73  // efficiency, we can't assume that all function callers MUST use an
    74  // indexed informer to look up objects.
    75  type VPAController struct {
    76  	ctx  context.Context
    77  	conf *controller.VPAConfig
    78  
    79  	vpaUpdater      control.VPAUpdater
    80  	podUpdater      control.PodUpdater
    81  	workloadControl control.UnstructuredControl
    82  
    83  	vpaIndexer cache.Indexer
    84  	podIndexer cache.Indexer
    85  
    86  	// workloadLister stores all the dynamic informers the controller needs,
    87  	// while vpaEnabledWorkload stores all the workload that be enabled with vpa
    88  	podLister          corelisters.PodLister
    89  	vpaLister          autoscalelister.KatalystVerticalPodAutoscalerLister
    90  	vpaRecLister       autoscalelister.VerticalPodAutoscalerRecommendationLister
    91  	workloadLister     map[schema.GroupVersionKind]cache.GenericLister
    92  	vpaEnabledWorkload map[schema.GroupVersionKind]interface{}
    93  
    94  	syncedFunc []cache.InformerSynced
    95  
    96  	vpaSyncQueue   workqueue.RateLimitingInterface
    97  	vpaSyncWorkers int
    98  
    99  	vpaStatusController *vpaStatusController
   100  
   101  	metricsEmitter metrics.MetricEmitter
   102  }
   103  
   104  func NewVPAController(ctx context.Context, controlCtx *katalyst_base.GenericContext,
   105  	genericConf *generic.GenericConfiguration, _ *controller.GenericControllerConfiguration,
   106  	vpaConf *controller.VPAConfig,
   107  ) (*VPAController, error) {
   108  	podInformer := controlCtx.KubeInformerFactory.Core().V1().Pods()
   109  	vpaInformer := controlCtx.InternalInformerFactory.Autoscaling().V1alpha1().KatalystVerticalPodAutoscalers()
   110  	vpaRecInformer := controlCtx.InternalInformerFactory.Autoscaling().V1alpha1().VerticalPodAutoscalerRecommendations()
   111  
   112  	genericClient := controlCtx.Client
   113  	vpaController := &VPAController{
   114  		ctx:                ctx,
   115  		conf:               vpaConf,
   116  		vpaIndexer:         vpaInformer.Informer().GetIndexer(),
   117  		podIndexer:         podInformer.Informer().GetIndexer(),
   118  		podLister:          podInformer.Lister(),
   119  		vpaLister:          vpaInformer.Lister(),
   120  		vpaRecLister:       vpaRecInformer.Lister(),
   121  		workloadLister:     make(map[schema.GroupVersionKind]cache.GenericLister),
   122  		vpaEnabledWorkload: make(map[schema.GroupVersionKind]interface{}),
   123  		vpaUpdater:         &control.DummyVPAUpdater{},
   124  		podUpdater:         &control.DummyPodUpdater{},
   125  		workloadControl:    &control.DummyUnstructuredControl{},
   126  		vpaSyncQueue:       workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "vpa"),
   127  		vpaSyncWorkers:     vpaConf.VPASyncWorkers,
   128  		syncedFunc: []cache.InformerSynced{
   129  			podInformer.Informer().HasSynced,
   130  			vpaInformer.Informer().HasSynced,
   131  			vpaRecInformer.Informer().HasSynced,
   132  		},
   133  	}
   134  
   135  	workloadInformers := controlCtx.DynamicResourcesManager.GetDynamicInformers()
   136  	for _, wf := range workloadInformers {
   137  		vpaController.workloadLister[wf.GVK] = wf.Informer.Lister()
   138  		vpaController.syncedFunc = append(vpaController.syncedFunc, wf.Informer.Informer().HasSynced)
   139  	}
   140  
   141  	for _, workload := range vpaConf.VPAWorkloadGVResources {
   142  		wf, ok := workloadInformers[workload]
   143  		if !ok {
   144  			klog.Errorf("vpa concerned workload %s not found in dynamic GVR resources", workload)
   145  			continue
   146  		}
   147  
   148  		vpaController.vpaEnabledWorkload[wf.GVK] = struct{}{}
   149  		wf.Informer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   150  			AddFunc:    vpaController.addWorkload,
   151  			UpdateFunc: vpaController.updateWorkload,
   152  		})
   153  	}
   154  
   155  	vpaInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   156  		AddFunc:    vpaController.addVPA,
   157  		UpdateFunc: vpaController.updateVPA,
   158  	})
   159  
   160  	// vpa controller need update pod resource when pod was recreated
   161  	// because vpa pod webhook may not always work
   162  	podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   163  		AddFunc: vpaController.addPod,
   164  	})
   165  
   166  	// build indexer: workload --> vpa
   167  	if _, ok := vpaInformer.Informer().GetIndexer().GetIndexers()[consts.TargetReferenceIndex]; !ok {
   168  		err := vpaInformer.Informer().GetIndexer().AddIndexers(cache.Indexers{
   169  			consts.TargetReferenceIndex: katalystutil.VPATargetReferenceIndex,
   170  		})
   171  		if err != nil {
   172  			klog.Errorf("failed to add vpa target reference index: %v", err)
   173  			return nil, err
   174  		}
   175  	}
   176  
   177  	// build index: workload ---> pod
   178  	for _, key := range vpaConf.VPAPodLabelIndexerKeys {
   179  		indexer := native.PodLabelIndexer(key)
   180  		if _, ok := vpaController.podIndexer.GetIndexers()[key]; !ok {
   181  			err := vpaController.podIndexer.AddIndexers(cache.Indexers{
   182  				key: indexer.IndexFunc,
   183  			})
   184  			if err != nil {
   185  				klog.Errorf("[vpa] failed to add label index for pod: %v", err)
   186  				return nil, err
   187  			}
   188  		}
   189  	}
   190  
   191  	vpaController.metricsEmitter = controlCtx.EmitterPool.GetDefaultMetricsEmitter()
   192  	if vpaController.metricsEmitter == nil {
   193  		vpaController.metricsEmitter = metrics.DummyMetrics{}
   194  	}
   195  
   196  	if !genericConf.DryRun {
   197  		vpaController.vpaUpdater = control.NewRealVPAUpdater(genericClient.InternalClient)
   198  		vpaController.podUpdater = control.NewRealPodUpdater(genericClient.KubeClient)
   199  		vpaController.workloadControl = control.NewRealUnstructuredControl(genericClient.DynamicClient)
   200  	}
   201  
   202  	vpaController.vpaStatusController = newVPAStatusController(
   203  		ctx,
   204  		controlCtx,
   205  		vpaConf,
   206  		vpaController.workloadLister,
   207  		vpaController.vpaUpdater,
   208  	)
   209  
   210  	return vpaController, nil
   211  }
   212  
   213  func (vc *VPAController) Run() {
   214  	defer utilruntime.HandleCrash()
   215  	defer vc.vpaSyncQueue.ShutDown()
   216  
   217  	defer klog.Infof("[vpa] shutting down %s controller", vpaControllerName)
   218  
   219  	if !cache.WaitForCacheSync(vc.ctx.Done(), vc.syncedFunc...) {
   220  		utilruntime.HandleError(fmt.Errorf("unable to sync caches for %s controller", vpaControllerName))
   221  		return
   222  	}
   223  
   224  	klog.Infof("[vpa] caches are synced for %s controller", vpaControllerName)
   225  	klog.Infof("[vpa] start %d workers for %s controller", vc.vpaSyncWorkers, vpaControllerName)
   226  
   227  	for i := 0; i < vc.vpaSyncWorkers; i++ {
   228  		go wait.Until(vc.vpaWorker, time.Second, vc.ctx.Done())
   229  	}
   230  	go wait.Until(vc.maintainVPAName, time.Second*10, vc.ctx.Done())
   231  
   232  	// run update vpa status manager.
   233  	go vc.vpaStatusController.run()
   234  
   235  	<-vc.ctx.Done()
   236  }
   237  
   238  func (vc *VPAController) addWorkload(obj interface{}) {
   239  	workload, ok := obj.(*unstructured.Unstructured)
   240  	if !ok {
   241  		klog.Errorf("[vpa] cannot convert obj to *unstructured.Unstructured)")
   242  		return
   243  	}
   244  
   245  	if !katalystutil.CheckWorkloadEnableVPA(workload) {
   246  		return
   247  	}
   248  
   249  	vpa, err := katalystutil.GetVPAForWorkload(workload, vc.vpaIndexer, vc.vpaLister)
   250  	if err != nil {
   251  		klog.Errorf("[vpa] get vpa for workload %v err: %v", workload.GetName(), err)
   252  		return
   253  	}
   254  	vc.enqueueVPA(vpa)
   255  }
   256  
   257  func (vc *VPAController) updateWorkload(_, cur interface{}) {
   258  	vc.addWorkload(cur)
   259  }
   260  
   261  func (vc *VPAController) addVPA(obj interface{}) {
   262  	v, ok := obj.(*apis.KatalystVerticalPodAutoscaler)
   263  	if !ok {
   264  		klog.Errorf("cannot convert obj to *apis.VerticalPodAutoscaler: %v", obj)
   265  		return
   266  	}
   267  
   268  	klog.V(4).Infof("notice addition of VerticalPodAutoscaler %s", v.Name)
   269  	vc.enqueueVPA(v)
   270  }
   271  
   272  func (vc *VPAController) updateVPA(_, cur interface{}) {
   273  	v, ok := cur.(*apis.KatalystVerticalPodAutoscaler)
   274  	if !ok {
   275  		klog.Errorf("cannot convert curObj to *apis.VerticalPodAutoscaler: %v", cur)
   276  		return
   277  	}
   278  
   279  	klog.V(4).Infof("notice update of VerticalPodAutoscaler %s", v.Name)
   280  	vc.enqueueVPA(v)
   281  }
   282  
   283  func (vc *VPAController) addPod(obj interface{}) {
   284  	pod, ok := obj.(*core.Pod)
   285  	if !ok {
   286  		klog.Errorf("cannot convert obj to *core.Pod: %v", obj)
   287  		return
   288  	}
   289  
   290  	vpa, err := katalystutil.GetVPAForPod(pod, vc.vpaIndexer, vc.workloadLister, vc.vpaLister)
   291  	if err != nil {
   292  		klog.V(6).Infof("didn't to find vpa of pod %v/%v, err: %v", pod.Namespace, pod.Name, err)
   293  		return
   294  	}
   295  
   296  	klog.V(6).Infof("notice addition of pod %s", pod.Name)
   297  	vc.enqueueVPA(vpa)
   298  }
   299  
   300  func (vc *VPAController) enqueueVPA(vpa *apis.KatalystVerticalPodAutoscaler) {
   301  	if vpa == nil {
   302  		klog.Warning("trying to enqueueVPA a nil VPA")
   303  		return
   304  	}
   305  
   306  	key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(vpa)
   307  	if err != nil {
   308  		utilruntime.HandleError(fmt.Errorf("couldn't get key for object %#v: %v", vpa, err))
   309  		return
   310  	}
   311  
   312  	vc.vpaSyncQueue.Add(key)
   313  }
   314  
   315  func (vc *VPAController) vpaWorker() {
   316  	for vc.processNextVPA() {
   317  	}
   318  }
   319  
   320  func (vc *VPAController) processNextVPA() bool {
   321  	key, quit := vc.vpaSyncQueue.Get()
   322  	if quit {
   323  		return false
   324  	}
   325  	defer vc.vpaSyncQueue.Done(key)
   326  
   327  	err := vc.syncVPA(key.(string))
   328  	if err == nil {
   329  		vc.vpaSyncQueue.Forget(key)
   330  		return true
   331  	}
   332  
   333  	utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err))
   334  	vc.vpaSyncQueue.AddRateLimited(key)
   335  
   336  	return true
   337  }
   338  
   339  func (vc *VPAController) syncVPA(key string) error {
   340  	namespace, name, err := cache.SplitMetaNamespaceKey(key)
   341  	if err != nil {
   342  		klog.Errorf("[vpa] failed to split namespace and name from key %s", key)
   343  		return err
   344  	}
   345  
   346  	timeSets := make(map[string]time.Time)
   347  	tags := []metrics.MetricTag{
   348  		{Key: "vpa_namespace", Val: namespace},
   349  		{Key: "vpa_name", Val: name},
   350  	}
   351  	_ = vc.metricsEmitter.StoreInt64(metricNameVAPControlVPASync, 1, metrics.MetricTypeNameCount)
   352  
   353  	timeSets[metricNameVAPControlVPASyncCosts] = time.Now()
   354  	defer func() {
   355  		vc.syncPerformance(namespace, name, timeSets, tags)
   356  	}()
   357  
   358  	vpa, err := vc.vpaLister.KatalystVerticalPodAutoscalers(namespace).Get(name)
   359  	if err != nil {
   360  		klog.Errorf("[vpa] vpa %s/%s get error: %v", namespace, name, err)
   361  		if errors.IsNotFound(err) {
   362  			return nil
   363  		}
   364  		return err
   365  	}
   366  
   367  	klog.V(4).Infof("[vpa] syncing vpa %s", vpa.Name)
   368  
   369  	timeSets[metricNameVAPControlGetWorkloadCosts] = time.Now()
   370  	gvk := schema.FromAPIVersionAndKind(vpa.Spec.TargetRef.APIVersion, vpa.Spec.TargetRef.Kind)
   371  	workloadLister, ok := vc.workloadLister[gvk]
   372  	if !ok {
   373  		klog.Errorf("[vpa] vpa %s/%s without workload lister %v", namespace, name, gvk)
   374  		return nil
   375  	}
   376  
   377  	workloadObj, err := katalystutil.GetWorkloadForVPA(vpa, workloadLister)
   378  	if err != nil {
   379  		klog.Errorf("[vpa] vpa %s/%s get workload error: %v", namespace, name, err)
   380  		return err
   381  	}
   382  
   383  	workload := workloadObj.(*unstructured.Unstructured)
   384  	if err := vc.setVPAAnnotations(workload, gvk, vpa.Name); err != nil {
   385  		klog.Errorf("[vpa] set workload %s annotation %v error: %v", workload.GetName(), vpa.Name, err)
   386  		return err
   387  	}
   388  
   389  	timeSets[metricNameVAPControlVPAPatchCosts] = time.Now()
   390  	vpaNew := vpa.DeepCopy()
   391  	if vpa.Annotations[apiconsts.VPAAnnotationWorkloadRetentionPolicyKey] == apiconsts.VPAAnnotationWorkloadRetentionPolicyRetain {
   392  		if err := util.SetOwnerReferencesForVPA(vpaNew, workloadObj); err != nil {
   393  			klog.Errorf("[vpa] vpa %s/%s get workload error: %v", namespace, name, err)
   394  			return err
   395  		}
   396  	} else {
   397  		if err := util.DeleteOwnerReferencesForVPA(vpaNew, workloadObj); err != nil {
   398  			klog.Errorf("[vpa] vpa %s/%s get workload error: %v", namespace, name, err)
   399  			return err
   400  		}
   401  	}
   402  	if _, err := vc.vpaUpdater.PatchVPA(context.TODO(), vpa, vpaNew); err != nil {
   403  		return err
   404  	}
   405  
   406  	timeSets[metricNameVAPControlSyncPodCosts] = time.Now()
   407  	pods, err := katalystutil.GetPodListForVPA(vpa, vc.podIndexer, vc.conf.VPAPodLabelIndexerKeys, workloadLister, vc.podLister)
   408  	if err != nil {
   409  		klog.Errorf("[vpa] failed to get pods by vpa %s, err %v", vpa.Name, err)
   410  		_ = util.UpdateVPAConditions(vc.ctx, vc.vpaUpdater, vpa, apis.RecommendationUpdated, core.ConditionFalse, util.VPAConditionReasonCalculatedIllegal, "failed to find pods")
   411  		return err
   412  	}
   413  	klog.V(4).Infof("[vpa] syncing vpa %s with %d pods", name, len(pods))
   414  	_ = vc.metricsEmitter.StoreInt64(metricNameVAPControlVPAPodCount, int64(len(pods)), metrics.MetricTypeNameRaw, tags...)
   415  
   416  	pods, err = vc.filterPodsByUpdatePolicy(vpa, pods)
   417  	if err != nil {
   418  		klog.Errorf("[vpa] failed to filter pods by vpa %s update policy", vpa.Name)
   419  		_ = util.UpdateVPAConditions(vc.ctx, vc.vpaUpdater, vpa, apis.RecommendationUpdated, core.ConditionFalse, util.VPAConditionReasonCalculatedIllegal, "failed to filter pod")
   420  		return nil
   421  	}
   422  	klog.V(4).Infof("[vpa] syncing vpa %s with filtered %d pods", name, len(pods))
   423  
   424  	timeSets[metricNameVAPControlVPAUpdateCosts] = time.Now()
   425  	if err := vc.updatePodResources(vpa, pods); err != nil {
   426  		return err
   427  	}
   428  
   429  	return nil
   430  }
   431  
   432  func (vc *VPAController) syncPerformance(namespace, name string, times map[string]time.Time, tags []metrics.MetricTag) {
   433  	now := time.Now()
   434  	timeSets := []string{
   435  		metricNameVAPControlVPASyncCosts,
   436  		metricNameVAPControlGetWorkloadCosts,
   437  		metricNameVAPControlVPAPatchCosts,
   438  		metricNameVAPControlSyncPodCosts,
   439  		metricNameVAPControlVPAUpdateCosts,
   440  	}
   441  	for _, timeSet := range timeSets {
   442  		if begin, ok := times[timeSet]; ok {
   443  			costs := now.Sub(begin).Microseconds()
   444  			klog.V(3).Infof("[vpa] [%v/%v] %v costs %v us", namespace, name, timeSet, costs)
   445  			_ = vc.metricsEmitter.StoreInt64(timeSet, costs, metrics.MetricTypeNameRaw, tags...)
   446  		}
   447  	}
   448  }
   449  
   450  // maintainVPAName is mainly responsible to main vpa annotation in workload
   451  func (vc *VPAController) maintainVPAName() {
   452  	for gvk, workloadLister := range vc.workloadLister {
   453  		if _, ok := vc.vpaEnabledWorkload[gvk]; !ok {
   454  			continue
   455  		}
   456  
   457  		workloadList, err := workloadLister.List(labels.Everything())
   458  		if err != nil {
   459  			klog.Errorf("[vpa] list workloads failed: %v", err)
   460  			continue
   461  		}
   462  
   463  		for _, workloadObj := range workloadList {
   464  			needDelete := false
   465  
   466  			workload := workloadObj.(*unstructured.Unstructured)
   467  			vpa, err := katalystutil.GetVPAForWorkload(workload, vc.vpaIndexer, vc.vpaLister)
   468  			if err != nil {
   469  				if errors.IsNotFound(err) {
   470  					needDelete = true
   471  				} else {
   472  					klog.Errorf("[vpa] get vpa for workload %s error: %v", workload.GetName(), err)
   473  				}
   474  			} else if err := vc.setVPAAnnotations(workload, gvk, vpa.Name); err != nil {
   475  				klog.Errorf("[vpa] set vpa name for workload %s error: %v", workload.GetName(), err)
   476  			}
   477  
   478  			if needDelete {
   479  				klog.V(5).Infof("[vpa] delete un-wanted annotation for workload %v", workload.GetName())
   480  				if err := vc.cleanVPAAnnotations(workload, gvk); err != nil {
   481  					klog.Errorf("[vpa] clear vpa name for workload %s error: %v", workload.GetName(), err)
   482  				}
   483  			}
   484  		}
   485  	}
   486  }
   487  
   488  // filterPodsByUpdatePolicy filter out pods which didn't obey vpa update policy
   489  func (vc *VPAController) filterPodsByUpdatePolicy(vpa *apis.KatalystVerticalPodAutoscaler, pods []*core.Pod) ([]*core.Pod, error) {
   490  	if vpa.Spec.UpdatePolicy.PodUpdatingStrategy == apis.PodUpdatingStrategyRecreate {
   491  		return nil, fmt.Errorf("PodUpdatingStrategy mustn't be PodUpdatingStrategyRecreate")
   492  	}
   493  
   494  	remainPods := make([]*core.Pod, 0)
   495  	switch vpa.Spec.UpdatePolicy.PodMatchingStrategy {
   496  	case apis.PodMatchingStrategyAll:
   497  		remainPods = pods
   498  	case apis.PodMatchingStrategyForFreshPod:
   499  		for _, pod := range pods {
   500  			if pod == nil {
   501  				return nil, fmt.Errorf("pod can't be nil")
   502  			}
   503  			if vpa.CreationTimestamp.Before(&pod.CreationTimestamp) {
   504  				remainPods = append(remainPods, pod)
   505  			}
   506  		}
   507  	case apis.PodMatchingStrategyForHistoricalPod:
   508  		for _, pod := range pods {
   509  			if pod == nil {
   510  				return nil, fmt.Errorf("pod can't be nil")
   511  			}
   512  			if pod.CreationTimestamp.Before(&vpa.CreationTimestamp) {
   513  				remainPods = append(remainPods, pod)
   514  			}
   515  		}
   516  	}
   517  	return remainPods, nil
   518  }
   519  
   520  // setVPAAnnotations add vpa name in workload annotations
   521  func (vc *VPAController) setVPAAnnotations(workload *unstructured.Unstructured, gvk schema.GroupVersionKind, vpaName string) error {
   522  	if workload.GetAnnotations()[apiconsts.WorkloadAnnotationVPANameKey] == vpaName {
   523  		return nil
   524  	}
   525  
   526  	workloadCopy := workload.DeepCopy()
   527  	annotations := workloadCopy.GetAnnotations()
   528  	if annotations == nil {
   529  		annotations = make(map[string]string)
   530  	}
   531  	annotations[apiconsts.WorkloadAnnotationVPANameKey] = vpaName
   532  	workloadCopy.SetAnnotations(annotations)
   533  
   534  	gvr, _ := meta.UnsafeGuessKindToResource(gvk)
   535  	workloadGVR := metav1.GroupVersionResource{Version: gvr.Version, Group: gvr.Group, Resource: gvr.Resource}
   536  	_, err := vc.workloadControl.PatchUnstructured(vc.ctx, workloadGVR, workload, workloadCopy)
   537  	if err != nil {
   538  		return err
   539  	}
   540  
   541  	klog.Infof("[vpa] successfully clear annotations for workload %v to %v", workload.GetName(), vpaName)
   542  	return nil
   543  }
   544  
   545  // cleanVPAAnnotations removes vpa name in workload annotations
   546  func (vc *VPAController) cleanVPAAnnotations(workload *unstructured.Unstructured, gvk schema.GroupVersionKind) error {
   547  	if _, ok := workload.GetAnnotations()[apiconsts.WorkloadAnnotationVPANameKey]; !ok {
   548  		return nil
   549  	}
   550  
   551  	workloadCopy := workload.DeepCopy()
   552  	annotations := workloadCopy.GetAnnotations()
   553  	delete(annotations, apiconsts.WorkloadAnnotationVPANameKey)
   554  	workloadCopy.SetAnnotations(annotations)
   555  
   556  	gvr, _ := meta.UnsafeGuessKindToResource(gvk)
   557  	workloadGVR := metav1.GroupVersionResource{Version: gvr.Version, Group: gvr.Group, Resource: gvr.Resource}
   558  	_, err := vc.workloadControl.PatchUnstructured(vc.ctx, workloadGVR, workload, workloadCopy)
   559  	if err != nil {
   560  		return err
   561  	}
   562  
   563  	klog.Infof("[vpa] successfully clear annotations for workload %v", workload.GetName())
   564  	return nil
   565  }
   566  
   567  // updatePodResources updates resource recommendation for each individual pod
   568  func (vc *VPAController) updatePodResources(vpa *apis.KatalystVerticalPodAutoscaler, pods []*core.Pod) error {
   569  	podResources, containerResources, err := katalystutil.GenerateVPAResourceMap(vpa)
   570  	if err != nil {
   571  		return fmt.Errorf("[vpa] failed to get resource from VPA %s", vpa.Name)
   572  	}
   573  
   574  	containerPolicies, err := katalystutil.GenerateVPAPolicyMap(vpa)
   575  	if err != nil {
   576  		return fmt.Errorf("[vpa] get container policy for vpa %s error: %v", vpa.Name, err)
   577  	}
   578  
   579  	// If the PodApplyStrategy is set to 'Pod', the update policy controller will only apply changes to pod-level resources.
   580  	// This approach ensures that when pod-level resources are not specified, they are not unintentionally overridden by the
   581  	// container-level resource definitions.Therefore, if the strategy is 'Pod', we clear the containerResources to prevent
   582  	// any container-specific resource updates.
   583  	if vpa.Spec.UpdatePolicy.PodApplyStrategy == apis.PodApplyStrategyStrategyPod {
   584  		containerResources = nil
   585  	}
   586  
   587  	var mtx sync.Mutex
   588  	var errList []error
   589  	updatePodAnnotations := func(i int) {
   590  		pod := pods[i].DeepCopy()
   591  		err := vc.patchPodResources(vpa, pod, podResources, containerResources, containerPolicies)
   592  		if err != nil {
   593  			mtx.Lock()
   594  			errList = append(errList, err)
   595  			mtx.Unlock()
   596  			return
   597  		}
   598  	}
   599  	workqueue.ParallelizeUntil(vc.ctx, 16, len(pods), updatePodAnnotations)
   600  	if len(errList) > 0 {
   601  		_ = util.UpdateVPAConditions(vc.ctx, vc.vpaUpdater, vpa, apis.RecommendationUpdated, core.ConditionFalse,
   602  			util.VPAConditionReasonCalculatedIllegal, "failed to update pod annotations")
   603  		return utilerrors.NewAggregate(errList)
   604  	}
   605  
   606  	return util.UpdateVPAConditions(vc.ctx, vc.vpaUpdater, vpa, apis.RecommendationUpdated, core.ConditionTrue, util.VPAConditionReasonUpdated, "")
   607  }
   608  
   609  // patchPodResources updates resource recommendation for each individual pod
   610  func (vc *VPAController) patchPodResources(vpa *apis.KatalystVerticalPodAutoscaler, pod *core.Pod,
   611  	podResources map[consts.PodContainerName]apis.ContainerResources, containerResources map[consts.ContainerName]apis.ContainerResources,
   612  	containerPolicies map[string]apis.ContainerResourcePolicy,
   613  ) error {
   614  	annotationResource, err := katalystutil.GenerateVPAPodResizeResourceAnnotations(pod, podResources, containerResources)
   615  	if err != nil {
   616  		return fmt.Errorf("failed to exact pod %v resize resource annotation from container resource: %v", pod.Name, err)
   617  	}
   618  
   619  	marshalledResourceAnnotation, err := json.Marshal(annotationResource)
   620  	if err != nil {
   621  		return err
   622  	}
   623  
   624  	annotationPolicy, err := katalystutil.GenerateVPAPodResizePolicyAnnotations(pod, containerPolicies)
   625  	if err != nil {
   626  		return err
   627  	}
   628  
   629  	marshalledPolicyAnnotation, err := json.Marshal(annotationPolicy)
   630  	if err != nil {
   631  		return err
   632  	}
   633  
   634  	podCopy := pod.DeepCopy()
   635  	if native.PodResourceDiff(pod, annotationResource) {
   636  		if len(annotationResource) > 0 {
   637  			podCopy.Annotations[apiconsts.PodAnnotationInplaceUpdateResourcesKey] = string(marshalledResourceAnnotation)
   638  		} else {
   639  			delete(podCopy.Annotations, apiconsts.PodAnnotationInplaceUpdateResourcesKey)
   640  		}
   641  		if len(annotationPolicy) > 0 {
   642  			podCopy.Annotations[apiconsts.PodAnnotationInplaceUpdateResizePolicyKey] = string(marshalledPolicyAnnotation)
   643  		} else {
   644  			delete(podCopy.Annotations, apiconsts.PodAnnotationInplaceUpdateResizePolicyKey)
   645  		}
   646  	}
   647  
   648  	if !apiequality.Semantic.DeepEqual(pod.Annotations, podCopy.Annotations) {
   649  		podUpdater := vc.podUpdater
   650  		if vpa == nil || vpa.Spec.UpdatePolicy.PodUpdatingStrategy == apis.PodUpdatingStrategyOff {
   651  			podUpdater = &control.DummyPodUpdater{}
   652  			klog.Warning("will not update pod %s/%s due to PodUpdatingStrategy", pod.Namespace, pod.Name)
   653  		}
   654  
   655  		if err := podUpdater.PatchPod(vc.ctx, pod, podCopy); err != nil {
   656  			return err
   657  		}
   658  	} else {
   659  		klog.V(5).Infof("pod %s/%s has no need to update resources", pod.Namespace, pod.Name) //nolint:gomnd
   660  	}
   661  
   662  	return nil
   663  }