github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/vpa/vpa_status.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vpa
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	v1 "k8s.io/api/core/v1"
    25  	apiequality "k8s.io/apimachinery/pkg/api/equality"
    26  	"k8s.io/apimachinery/pkg/api/errors"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  	"k8s.io/apimachinery/pkg/runtime/schema"
    29  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    30  	"k8s.io/apimachinery/pkg/util/wait"
    31  	corelisters "k8s.io/client-go/listers/core/v1"
    32  	"k8s.io/client-go/tools/cache"
    33  	"k8s.io/client-go/util/workqueue"
    34  	"k8s.io/klog/v2"
    35  
    36  	apis "github.com/kubewharf/katalyst-api/pkg/apis/autoscaling/v1alpha1"
    37  	autoscalelister "github.com/kubewharf/katalyst-api/pkg/client/listers/autoscaling/v1alpha1"
    38  	katalyst_base "github.com/kubewharf/katalyst-core/cmd/base"
    39  	"github.com/kubewharf/katalyst-core/pkg/client/control"
    40  	"github.com/kubewharf/katalyst-core/pkg/config/controller"
    41  	"github.com/kubewharf/katalyst-core/pkg/controller/vpa/util"
    42  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    43  	katalystutil "github.com/kubewharf/katalyst-core/pkg/util"
    44  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    45  )
    46  
    47  const (
    48  	metricNameVAPControlVPAUpdateStatusCosts = "vpa_vpa_update_resource_costs"
    49  )
    50  
    51  type vpaStatusController struct {
    52  	ctx  context.Context
    53  	conf *controller.VPAConfig
    54  
    55  	vpaIndexer cache.Indexer
    56  	podIndexer cache.Indexer
    57  
    58  	podLister corelisters.PodLister
    59  	vpaLister autoscalelister.KatalystVerticalPodAutoscalerLister
    60  
    61  	syncedFunc           []cache.InformerSynced
    62  	vpaSyncQueue         workqueue.RateLimitingInterface
    63  	vpaStatusSyncWorkers int
    64  
    65  	workloadLister map[schema.GroupVersionKind]cache.GenericLister
    66  
    67  	vpaUpdater control.VPAUpdater
    68  
    69  	metricsEmitter metrics.MetricEmitter
    70  }
    71  
    72  func newVPAStatusController(ctx context.Context, controlCtx *katalyst_base.GenericContext,
    73  	conf *controller.VPAConfig, workloadLister map[schema.GroupVersionKind]cache.GenericLister,
    74  	vpaUpdater control.VPAUpdater,
    75  ) *vpaStatusController {
    76  	podInformer := controlCtx.KubeInformerFactory.Core().V1().Pods()
    77  	vpaInformer := controlCtx.InternalInformerFactory.Autoscaling().V1alpha1().KatalystVerticalPodAutoscalers()
    78  
    79  	c := &vpaStatusController{
    80  		ctx:        ctx,
    81  		conf:       conf,
    82  		vpaIndexer: vpaInformer.Informer().GetIndexer(),
    83  		vpaLister:  vpaInformer.Lister(),
    84  		podIndexer: podInformer.Informer().GetIndexer(),
    85  		podLister:  podInformer.Lister(),
    86  		syncedFunc: []cache.InformerSynced{
    87  			podInformer.Informer().HasSynced,
    88  			vpaInformer.Informer().HasSynced,
    89  		},
    90  		vpaSyncQueue:         workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "vpa-status"),
    91  		vpaStatusSyncWorkers: conf.VPASyncWorkers,
    92  		vpaUpdater:           vpaUpdater,
    93  		workloadLister:       workloadLister,
    94  		metricsEmitter:       controlCtx.EmitterPool.GetDefaultMetricsEmitter().WithTags("vpa-status"),
    95  	}
    96  
    97  	// we need update current container resource to vpa status,
    98  	// so we need watch pod update event (if the in-place updating succeeded)
    99  	podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   100  		AddFunc:    c.addPod,
   101  		UpdateFunc: c.updatePod,
   102  	})
   103  
   104  	vpaInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   105  		AddFunc:    c.addVPA,
   106  		UpdateFunc: c.updateVPA,
   107  	})
   108  
   109  	return c
   110  }
   111  
   112  func (vs *vpaStatusController) run() {
   113  	defer utilruntime.HandleCrash()
   114  	defer vs.vpaSyncQueue.ShutDown()
   115  
   116  	defer klog.Infof("[vpa-status] shutting down vpa status collector")
   117  
   118  	if !cache.WaitForCacheSync(vs.ctx.Done(), vs.syncedFunc...) {
   119  		utilruntime.HandleError(fmt.Errorf("unable to sync caches for vpa status collector"))
   120  		return
   121  	}
   122  
   123  	klog.Infof("[vpa-status] caches are synced for vpa status collector")
   124  
   125  	for i := 0; i < vs.vpaStatusSyncWorkers; i++ {
   126  		go wait.Until(vs.vpaWorker, time.Second, vs.ctx.Done())
   127  	}
   128  
   129  	<-vs.ctx.Done()
   130  }
   131  
   132  func (vs *vpaStatusController) addVPA(obj interface{}) {
   133  	v, ok := obj.(*apis.KatalystVerticalPodAutoscaler)
   134  	if !ok {
   135  		klog.Errorf("[vpa-status] cannot convert obj to *apis.VerticalPodAutoscaler: %v", obj)
   136  		return
   137  	}
   138  
   139  	klog.V(6).Infof("[vpa-status] notice addition of VerticalPodAutoscaler %s", v.Name)
   140  	vs.enqueueVPA(v)
   141  }
   142  
   143  func (vs *vpaStatusController) updateVPA(old, cur interface{}) {
   144  	oldVPA, ok := old.(*apis.KatalystVerticalPodAutoscaler)
   145  	if !ok {
   146  		klog.Errorf("[vpa-status] cannot convert oldObj to *apis.VerticalPodAutoscaler: %v", old)
   147  		return
   148  	}
   149  
   150  	curVPA, ok := cur.(*apis.KatalystVerticalPodAutoscaler)
   151  	if !ok {
   152  		klog.Errorf("[vpa-status] cannot convert curObj to *apis.VerticalPodAutoscaler: %v", cur)
   153  		return
   154  	}
   155  
   156  	if apiequality.Semantic.DeepEqual(oldVPA.Status, curVPA.Status) {
   157  		return
   158  	}
   159  
   160  	klog.V(6).Infof("[vpa-status] notice update of vpa %s", native.GenerateUniqObjectNameKey(curVPA))
   161  	vs.enqueueVPA(curVPA)
   162  }
   163  
   164  func (vs *vpaStatusController) addPod(obj interface{}) {
   165  	pod, ok := obj.(*v1.Pod)
   166  	if !ok {
   167  		klog.Errorf("[vpa-status] cannot convert obj to *core.Pod: %v", obj)
   168  		return
   169  	}
   170  
   171  	vpa, err := katalystutil.GetVPAForPod(pod, vs.vpaIndexer, vs.workloadLister, vs.vpaLister)
   172  	if err != nil {
   173  		klog.V(6).Infof("[vpa-status] didn't to find vpa of pod %s, err: %v", native.GenerateUniqObjectNameKey(pod), err)
   174  		return
   175  	}
   176  
   177  	klog.V(6).Infof("[vpa-status] notice addition of pod %s", native.GenerateUniqObjectNameKey(pod))
   178  	vs.enqueueVPA(vpa)
   179  }
   180  
   181  func (vs *vpaStatusController) updatePod(old interface{}, cur interface{}) {
   182  	oldPod, ok := old.(*v1.Pod)
   183  	if !ok {
   184  		klog.Errorf("[vpa-status] cannot convert obj to *core.Pod: %v", cur)
   185  		return
   186  	}
   187  
   188  	curPod, ok := cur.(*v1.Pod)
   189  	if !ok {
   190  		klog.Errorf("[vpa-status] cannot convert obj to *core.Pod: %v", cur)
   191  		return
   192  	}
   193  
   194  	// only when pod status or spec.containers has been changed, in-place update resource may be completed,
   195  	// so it only enqueue vpa to collect vpa status when they are different with old one
   196  	if apiequality.Semantic.DeepEqual(oldPod.Status, curPod.Status) &&
   197  		apiequality.Semantic.DeepEqual(oldPod.Spec.Containers, curPod.Spec.Containers) {
   198  		return
   199  	}
   200  
   201  	vpa, err := katalystutil.GetVPAForPod(curPod, vs.vpaIndexer, vs.workloadLister, vs.vpaLister)
   202  	if err != nil {
   203  		klog.V(6).Infof("[vpa-status] didn't to find vpa of pod %s, err: %v",
   204  			native.GenerateUniqObjectNameKey(curPod), err)
   205  		return
   206  	}
   207  
   208  	klog.V(6).Infof("[vpa-status] notice update of pod %s", native.GenerateUniqObjectNameKey(curPod))
   209  	vs.enqueueVPA(vpa)
   210  }
   211  
   212  func (vs *vpaStatusController) enqueueVPA(vpa *apis.KatalystVerticalPodAutoscaler) {
   213  	if vpa == nil {
   214  		klog.Warning("[vpa-status] trying to enqueueVPA a nil VPA")
   215  		return
   216  	}
   217  
   218  	key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(vpa)
   219  	if err != nil {
   220  		utilruntime.HandleError(fmt.Errorf("couldn't get key for object %#v: %v", vpa, err))
   221  		return
   222  	}
   223  
   224  	vs.vpaSyncQueue.Add(key)
   225  }
   226  
   227  func (vs *vpaStatusController) vpaWorker() {
   228  	for vs.processNextVPA() {
   229  	}
   230  }
   231  
   232  func (vs *vpaStatusController) processNextVPA() bool {
   233  	key, quit := vs.vpaSyncQueue.Get()
   234  	if quit {
   235  		return false
   236  	}
   237  	defer vs.vpaSyncQueue.Done(key)
   238  
   239  	err := vs.syncVPA(key.(string))
   240  	if err == nil {
   241  		vs.vpaSyncQueue.Forget(key)
   242  		return true
   243  	}
   244  
   245  	utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err))
   246  	vs.vpaSyncQueue.AddRateLimited(key)
   247  
   248  	return true
   249  }
   250  
   251  func (vs *vpaStatusController) syncVPA(key string) error {
   252  	namespace, name, err := cache.SplitMetaNamespaceKey(key)
   253  	if err != nil {
   254  		klog.Errorf("[vpa-status] failed to split namespace and name from key %s", key)
   255  		return err
   256  	}
   257  
   258  	begin := time.Now()
   259  	defer func() {
   260  		now := time.Now()
   261  		costs := now.Sub(begin).Microseconds()
   262  		klog.V(3).Infof("[vpa-status] [%v/%v] %v costs %v us", namespace, name, metricNameVAPControlVPAUpdateStatusCosts, costs)
   263  		_ = vs.metricsEmitter.StoreInt64(metricNameVAPControlVPAUpdateStatusCosts, costs, metrics.MetricTypeNameRaw, []metrics.MetricTag{
   264  			{Key: "vpa_namespace", Val: namespace},
   265  			{Key: "vpa_name", Val: name},
   266  		}...)
   267  	}()
   268  
   269  	vpa, err := vs.vpaLister.KatalystVerticalPodAutoscalers(namespace).Get(name)
   270  	if err != nil {
   271  		klog.Errorf("[vpa-status] vpa %s/%s get error: %v", namespace, name, err)
   272  		if errors.IsNotFound(err) {
   273  			return nil
   274  		}
   275  		return err
   276  	}
   277  
   278  	gvk := schema.FromAPIVersionAndKind(vpa.Spec.TargetRef.APIVersion, vpa.Spec.TargetRef.Kind)
   279  	workloadLister, ok := vs.workloadLister[gvk]
   280  	if !ok {
   281  		klog.Errorf("[vpa-status] vpa %s/%s without workload lister %v", namespace, name, gvk)
   282  		return nil
   283  	}
   284  
   285  	pods, err := katalystutil.GetPodListForVPA(vpa, vs.podIndexer, vs.conf.VPAPodLabelIndexerKeys, workloadLister, vs.podLister)
   286  	if err != nil {
   287  		klog.Errorf("[vpa-status] failed to get pods by vpa %s, err %v", vpa.Name, err)
   288  		return err
   289  	}
   290  
   291  	// get pod resources and container resources according to current pods
   292  	vpaPodResources, vpaContainerResources, err := util.GetVPAResourceStatusWithCurrent(vpa, pods)
   293  	if err != nil {
   294  		klog.Errorf("[vpa-status] get vpa status with current pods err: %v", err)
   295  		return err
   296  	}
   297  
   298  	vpaNew := vpa.DeepCopy()
   299  	vpaNew.Status.PodResources = vpaPodResources
   300  	vpaNew.Status.ContainerResources = vpaContainerResources
   301  
   302  	// set RecommendationApplied condition, based on whether all pods for this vpa
   303  	// are updated to the expected resources in their annotations
   304  	err = vs.setRecommendationAppliedCondition(vpaNew, pods)
   305  	if err != nil {
   306  		klog.Errorf("[vpa-status] set recommendation applied condition failed: %v", err)
   307  		return err
   308  	}
   309  
   310  	// skip to update status if no change happened
   311  	if apiequality.Semantic.DeepEqual(vpa.Status, vpaNew.Status) {
   312  		return nil
   313  	}
   314  
   315  	_, err = vs.vpaUpdater.UpdateVPAStatus(vs.ctx, vpaNew, metav1.UpdateOptions{})
   316  	if err != nil {
   317  		klog.Errorf("[vpa-status] update vpa status err: %v", err)
   318  		return err
   319  	}
   320  
   321  	return nil
   322  }
   323  
   324  // setRecommendationAppliedCondition set vpa recommendation applied condition by checking all pods whether
   325  // are updated to the expected resources in their annotations
   326  func (vs *vpaStatusController) setRecommendationAppliedCondition(vpa *apis.KatalystVerticalPodAutoscaler, pods []*v1.Pod) error {
   327  	failedCount := 0
   328  	for _, pod := range pods {
   329  		if !katalystutil.CheckPodSpecUpdated(pod) {
   330  			failedCount += 1
   331  		}
   332  	}
   333  
   334  	if failedCount == 0 {
   335  		err := util.SetVPAConditions(vpa, apis.RecommendationApplied, v1.ConditionTrue, util.VPAConditionReasonPodSpecUpdated, "")
   336  		if err != nil {
   337  			return err
   338  		}
   339  	} else {
   340  		msg := fmt.Sprintf("failed to update %d pods, total %d pods", failedCount, len(pods))
   341  		err := util.SetVPAConditions(vpa, apis.RecommendationApplied, v1.ConditionFalse, util.VPAConditionReasonPodSpecNoUpdate, msg)
   342  		if err != nil {
   343  			return err
   344  		}
   345  	}
   346  	return nil
   347  }