github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/vpa/recommend.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vpa
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	"k8s.io/apimachinery/pkg/api/errors"
    25  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    26  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    27  	"k8s.io/apimachinery/pkg/labels"
    28  	"k8s.io/apimachinery/pkg/runtime/schema"
    29  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    30  	"k8s.io/apimachinery/pkg/util/wait"
    31  	coreListers "k8s.io/client-go/listers/core/v1"
    32  	"k8s.io/client-go/tools/cache"
    33  	"k8s.io/client-go/util/workqueue"
    34  	"k8s.io/klog/v2"
    35  
    36  	apis "github.com/kubewharf/katalyst-api/pkg/apis/autoscaling/v1alpha1"
    37  	autoscalelister "github.com/kubewharf/katalyst-api/pkg/client/listers/autoscaling/v1alpha1"
    38  	workloadlister "github.com/kubewharf/katalyst-api/pkg/client/listers/workload/v1alpha1"
    39  	katalystbase "github.com/kubewharf/katalyst-core/cmd/base"
    40  	"github.com/kubewharf/katalyst-core/pkg/client/control"
    41  	"github.com/kubewharf/katalyst-core/pkg/config/controller"
    42  	"github.com/kubewharf/katalyst-core/pkg/config/generic"
    43  	"github.com/kubewharf/katalyst-core/pkg/consts"
    44  	"github.com/kubewharf/katalyst-core/pkg/controller/vpa/algorithm"
    45  	"github.com/kubewharf/katalyst-core/pkg/controller/vpa/algorithm/recommenders"
    46  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    47  	katalystutil "github.com/kubewharf/katalyst-core/pkg/util"
    48  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    49  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    50  )
    51  
    52  const resourceRecommendControllerName = "resourceRecommend"
    53  
    54  const metricNameRecommendControlVPASyncCosts = "res_rec_vpa_sync_costs"
    55  
    56  // rs stores all the in-tree recommendation algorithm implementations
    57  var rs = []algorithm.ResourceRecommender{
    58  	recommenders.NewCPURecommender(),
    59  }
    60  
    61  func init() {
    62  	for _, r := range rs {
    63  		algorithm.RegisterRecommender(r)
    64  	}
    65  }
    66  
    67  // ResourceRecommendController is responsible to use in-tree algorithm implementations
    68  // to export those recommended results to vpa-rec according to vpa config.
    69  //
    70  // although we use informer index mechanism to speed up the looking
    71  // efficiency, we can't assume that all function callers MUST use an
    72  // indexed informer to look up objects.
    73  type ResourceRecommendController struct {
    74  	ctx  context.Context
    75  	conf *controller.VPAConfig
    76  
    77  	vpaUpdater    control.VPAUpdater
    78  	vpaRecUpdater control.VPARecommendationUpdater
    79  
    80  	spdIndexer    cache.Indexer
    81  	vpaRecIndexer cache.Indexer
    82  	podIndexer    cache.Indexer
    83  
    84  	podLister      coreListers.PodLister
    85  	spdLister      workloadlister.ServiceProfileDescriptorLister
    86  	vpaLister      autoscalelister.KatalystVerticalPodAutoscalerLister
    87  	vpaRecLister   autoscalelister.VerticalPodAutoscalerRecommendationLister
    88  	workloadLister map[schema.GroupVersionKind]cache.GenericLister
    89  
    90  	syncedFunc []cache.InformerSynced
    91  	vpaQueue   workqueue.RateLimitingInterface
    92  
    93  	metricsEmitter metrics.MetricEmitter
    94  
    95  	vpaSyncWorkers int
    96  }
    97  
    98  func NewResourceRecommendController(ctx context.Context, controlCtx *katalystbase.GenericContext,
    99  	genericConf *generic.GenericConfiguration, _ *controller.GenericControllerConfiguration,
   100  	config *controller.VPAConfig,
   101  ) (*ResourceRecommendController, error) {
   102  	if controlCtx == nil {
   103  		return nil, fmt.Errorf("controlCtx is invalid")
   104  	}
   105  
   106  	podInformer := controlCtx.KubeInformerFactory.Core().V1().Pods()
   107  	spdInformer := controlCtx.InternalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors()
   108  	vpaInformer := controlCtx.InternalInformerFactory.Autoscaling().V1alpha1().KatalystVerticalPodAutoscalers()
   109  	vpaRecInformer := controlCtx.InternalInformerFactory.Autoscaling().V1alpha1().VerticalPodAutoscalerRecommendations()
   110  
   111  	genericClient := controlCtx.Client
   112  	recController := &ResourceRecommendController{
   113  		ctx:            ctx,
   114  		conf:           config,
   115  		vpaUpdater:     &control.DummyVPAUpdater{},
   116  		vpaRecUpdater:  &control.DummyVPARecommendationUpdater{},
   117  		spdIndexer:     spdInformer.Informer().GetIndexer(),
   118  		vpaRecIndexer:  vpaRecInformer.Informer().GetIndexer(),
   119  		podIndexer:     podInformer.Informer().GetIndexer(),
   120  		podLister:      podInformer.Lister(),
   121  		spdLister:      spdInformer.Lister(),
   122  		vpaLister:      vpaInformer.Lister(),
   123  		vpaRecLister:   vpaRecInformer.Lister(),
   124  		workloadLister: make(map[schema.GroupVersionKind]cache.GenericLister),
   125  		vpaQueue:       workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "vpa"),
   126  		syncedFunc: []cache.InformerSynced{
   127  			podInformer.Informer().HasSynced,
   128  			spdInformer.Informer().HasSynced,
   129  			vpaInformer.Informer().HasSynced,
   130  			vpaRecInformer.Informer().HasSynced,
   131  		},
   132  		vpaSyncWorkers: config.VPASyncWorkers,
   133  	}
   134  
   135  	for _, wf := range controlCtx.DynamicResourcesManager.GetDynamicInformers() {
   136  		recController.workloadLister[wf.GVK] = wf.Informer.Lister()
   137  		recController.syncedFunc = append(recController.syncedFunc, wf.Informer.Informer().HasSynced)
   138  	}
   139  
   140  	klog.Infof("vpa resync period %v", config.VPAReSyncPeriod)
   141  
   142  	vpaInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{
   143  		AddFunc:    recController.addVPA,
   144  		UpdateFunc: recController.updateVPA,
   145  	}, config.VPAReSyncPeriod)
   146  
   147  	// build index: workload ---> spd
   148  	if _, ok := spdInformer.Informer().GetIndexer().GetIndexers()[consts.TargetReferenceIndex]; !ok {
   149  		err := spdInformer.Informer().GetIndexer().AddIndexers(cache.Indexers{
   150  			consts.TargetReferenceIndex: katalystutil.SPDTargetReferenceIndex,
   151  		})
   152  		if err != nil {
   153  			klog.Errorf("failed to add spd target reference index: %v", err)
   154  			return nil, err
   155  		}
   156  	}
   157  
   158  	// build index: vpa ---> vpaRec
   159  	if _, ok := vpaRecInformer.Informer().GetIndexer().GetIndexers()[consts.OwnerReferenceIndex]; !ok {
   160  		err := vpaRecInformer.Informer().GetIndexer().AddIndexers(cache.Indexers{
   161  			consts.OwnerReferenceIndex: native.ObjectOwnerReferenceIndex,
   162  		})
   163  		if err != nil {
   164  			klog.Errorf("[vpa-rec] failed to add owner vpa index: %v", err)
   165  			return nil, err
   166  		}
   167  	}
   168  
   169  	// build index: workload ---> pod
   170  	for _, key := range config.VPAPodLabelIndexerKeys {
   171  		indexer := native.PodLabelIndexer(key)
   172  		if _, ok := recController.podIndexer.GetIndexers()[key]; !ok {
   173  			err := recController.podIndexer.AddIndexers(cache.Indexers{
   174  				key: indexer.IndexFunc,
   175  			})
   176  			if err != nil {
   177  				klog.Errorf("[vpa-rec] failed to add label index for pod: %v", err)
   178  				return nil, err
   179  			}
   180  		}
   181  	}
   182  
   183  	recController.metricsEmitter = controlCtx.EmitterPool.GetDefaultMetricsEmitter()
   184  	if recController.metricsEmitter == nil {
   185  		recController.metricsEmitter = metrics.DummyMetrics{}
   186  	}
   187  
   188  	if !genericConf.DryRun {
   189  		recController.vpaUpdater = control.NewRealVPAUpdater(genericClient.InternalClient)
   190  		recController.vpaRecUpdater = control.NewRealVPARecommendationUpdater(genericClient.InternalClient)
   191  	}
   192  
   193  	return recController, nil
   194  }
   195  
   196  func (rrc *ResourceRecommendController) Run() {
   197  	defer utilruntime.HandleCrash()
   198  	defer rrc.vpaQueue.ShutDown()
   199  
   200  	defer klog.Infof("[resource-rec] shutting down %s controller", resourceRecommendControllerName)
   201  
   202  	if !cache.WaitForCacheSync(rrc.ctx.Done(), rrc.syncedFunc...) {
   203  		utilruntime.HandleError(fmt.Errorf("unable to sync caches for %s controller", resourceRecommendControllerName))
   204  		return
   205  	}
   206  	klog.Infof("[resource-rec] caches are synced for %s controller", resourceRecommendControllerName)
   207  	klog.Infof("[resource-rec] start %d workers for %s controller", rrc.vpaSyncWorkers, resourceRecommendControllerName)
   208  
   209  	for i := 0; i < rrc.vpaSyncWorkers; i++ {
   210  		go wait.Until(rrc.vpaWorker, time.Second, rrc.ctx.Done())
   211  	}
   212  
   213  	<-rrc.ctx.Done()
   214  }
   215  
   216  func (rrc *ResourceRecommendController) addVPA(obj interface{}) {
   217  	v, ok := obj.(*apis.KatalystVerticalPodAutoscaler)
   218  	if !ok {
   219  		klog.Errorf("[resource-rec] cannot convert obj to *apis.VerticalPodAutoscaler: %v", obj)
   220  		return
   221  	}
   222  
   223  	klog.V(4).Infof("[resource-rec] notice addition of vpa %s", v.Name)
   224  	rrc.enqueueVPA(v)
   225  }
   226  
   227  func (rrc *ResourceRecommendController) updateVPA(_, cur interface{}) {
   228  	v, ok := cur.(*apis.KatalystVerticalPodAutoscaler)
   229  	if !ok {
   230  		klog.Errorf("[resource-rec] cannot convert curObj to *apis.VerticalPodAutoscaler: %v", cur)
   231  		return
   232  	}
   233  
   234  	klog.V(4).Infof("[resource-rec] notice update of vpa %s", v.Name)
   235  	rrc.enqueueVPA(v)
   236  }
   237  
   238  func (rrc *ResourceRecommendController) enqueueVPA(vpa *apis.KatalystVerticalPodAutoscaler) {
   239  	key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(vpa)
   240  	if err != nil {
   241  		utilruntime.HandleError(fmt.Errorf("couldn't get key for object %#v: %v", vpa, err))
   242  		return
   243  	}
   244  	rrc.vpaQueue.Add(key)
   245  }
   246  
   247  func (rrc *ResourceRecommendController) vpaWorker() {
   248  	for rrc.processNextVPA() {
   249  	}
   250  }
   251  
   252  func (rrc *ResourceRecommendController) processNextVPA() bool {
   253  	key, quit := rrc.vpaQueue.Get()
   254  	if quit {
   255  		return false
   256  	}
   257  	defer rrc.vpaQueue.Done(key)
   258  
   259  	err := rrc.syncVPA(key.(string))
   260  	if err == nil {
   261  		rrc.vpaQueue.Forget(key)
   262  		return true
   263  	}
   264  
   265  	utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err))
   266  	rrc.vpaQueue.AddRateLimited(key)
   267  
   268  	return true
   269  }
   270  
   271  // syncVPA is mainly responsible to calculate resource recommendation for each vpa (with
   272  // recommender setting as in-tree algorithms); since we will re-sync periodicallly, we
   273  // won't return error in this function.
   274  func (rrc *ResourceRecommendController) syncVPA(key string) error {
   275  	namespace, name, err := cache.SplitMetaNamespaceKey(key)
   276  	if err != nil {
   277  		klog.Errorf("[resource-rec] failed to split namespace and name from key %s", key)
   278  		return err
   279  	}
   280  
   281  	begin := time.Now()
   282  	defer func() {
   283  		costs := time.Since(begin).Microseconds()
   284  		klog.Infof("[resource-rec] syncing vpa [%v/%v] costs %v us", namespace, name, costs)
   285  		_ = rrc.metricsEmitter.StoreInt64(metricNameRecommendControlVPASyncCosts, costs, metrics.MetricTypeNameRaw,
   286  			metrics.MetricTag{Key: "vpa_namespace", Val: namespace},
   287  			metrics.MetricTag{Key: "vpa_name", Val: name},
   288  		)
   289  	}()
   290  
   291  	vpa, err := rrc.vpaLister.KatalystVerticalPodAutoscalers(namespace).Get(name)
   292  	if err != nil {
   293  		if errors.IsNotFound(err) {
   294  			klog.Warningf("[resource-rec] vpa %s/%s is not found", namespace, name)
   295  			return nil
   296  		}
   297  
   298  		klog.Errorf("[resource-rec] vpa %s/%s get error: %v", namespace, name, err)
   299  		return nil
   300  	}
   301  	klog.V(4).Infof("[resource-rec] syncing vpa %s", vpa.Name)
   302  
   303  	gvk := schema.FromAPIVersionAndKind(vpa.Spec.TargetRef.APIVersion, vpa.Spec.TargetRef.Kind)
   304  	workloadLister, ok := rrc.workloadLister[gvk]
   305  	if !ok {
   306  		klog.Errorf("[resource-rec] vpa %s/%s without workload lister", namespace, name)
   307  		return nil
   308  	}
   309  
   310  	recommender := vpa.Spec.ResourcePolicy.AlgorithmPolicy.Recommender
   311  	r, ok := algorithm.GetRecommender()[recommender]
   312  	if !ok {
   313  		klog.V(8).ErrorS(nil, fmt.Sprintf("[resource-rec] recommender %v not supported", recommender))
   314  		return nil
   315  	}
   316  
   317  	pods, err := katalystutil.GetPodListForVPA(vpa, rrc.podIndexer, rrc.conf.VPAPodLabelIndexerKeys, workloadLister, rrc.podLister)
   318  	if err != nil {
   319  		klog.Errorf("[resource-rec] get pods for vpa %s/%s error: %v", namespace, name, err)
   320  		return nil
   321  	}
   322  
   323  	spd, err := katalystutil.GetSPDForVPA(vpa, rrc.spdIndexer, workloadLister, rrc.spdLister)
   324  	if err != nil {
   325  		klog.Warningf("[resource-rec] get spd for vpa %s/%s error: %v", namespace, name, err)
   326  		return nil
   327  	}
   328  
   329  	podResources, containerResources, err := r.GetRecommendedPodResources(spd, pods)
   330  	if err != nil {
   331  		klog.Errorf("[resource-rec] calculate resources for vpa %s/%s error: %v", namespace, name, err)
   332  		return nil
   333  	}
   334  
   335  	vpaRec, err := rrc.getOrCreateVpaRec(vpa)
   336  	if err != nil {
   337  		klog.Errorf("[resource-rec] get vpaRec for vpa %s/%s error: %v", namespace, name, err)
   338  		return nil
   339  	}
   340  
   341  	vpaRecNew := vpaRec.DeepCopy()
   342  	vpaRecNew.Spec.PodRecommendations = podResources
   343  	vpaRecNew.Spec.ContainerRecommendations = containerResources
   344  	err = rrc.vpaRecUpdater.PatchVPARecommendation(rrc.ctx, vpaRec, vpaRecNew)
   345  	if err != nil {
   346  		klog.Errorf("[resource-rec] get vpaRec for vpa %s/%s error: %v", namespace, name, err)
   347  		return nil
   348  	}
   349  	return nil
   350  }
   351  
   352  // cleanVPARec is mainly responsible to clean all vpaRec CR that should not exist
   353  func (rrc *ResourceRecommendController) cleanVPARec() {
   354  	recList, err := rrc.vpaRecLister.List(labels.Everything())
   355  	if err != nil {
   356  		klog.Errorf("[resource-rec] failed to list all vpaRec: %v", err)
   357  	}
   358  
   359  	for _, vpaRec := range recList {
   360  		needDelete := false
   361  		vpa, err := katalystutil.GetVPAForVPARec(vpaRec, rrc.vpaLister)
   362  		if err != nil {
   363  			if errors.IsNotFound(err) {
   364  				needDelete = true
   365  			} else {
   366  				klog.Errorf("[resource-rec] get vpa for vpaRec %s error: %v", vpaRec.Name, err)
   367  			}
   368  		} else {
   369  			// delete vpa-rec if the recommender field has already erased
   370  			recommender := vpa.Spec.ResourcePolicy.AlgorithmPolicy.Recommender
   371  			if recommender == "" {
   372  				needDelete = true
   373  			}
   374  		}
   375  
   376  		if needDelete {
   377  			klog.Warningf("[resource-rec] delete un-wanted vpaRec %v", vpaRec.Name)
   378  			if err := rrc.vpaRecUpdater.DeleteVPARecommendation(rrc.ctx, vpaRec, metav1.DeleteOptions{}); err != nil {
   379  				klog.Warningf("[resource-rec] delete un-wanted vpaRec %v err: %v", vpaRec.Name, err)
   380  			}
   381  		}
   382  	}
   383  }
   384  
   385  // getOrCreateVpaRec is used to main the in-tree vpaRec objects if it doesn't exist
   386  func (rrc *ResourceRecommendController) getOrCreateVpaRec(vpa *apis.KatalystVerticalPodAutoscaler) (*apis.VerticalPodAutoscalerRecommendation, error) {
   387  	vpaRec, err := katalystutil.GetVPARecForVPA(vpa, rrc.vpaRecIndexer, rrc.vpaRecLister)
   388  	if err != nil {
   389  		if !apierrors.IsNotFound(err) {
   390  			return nil, err
   391  		}
   392  	} else {
   393  		return vpaRec, nil
   394  	}
   395  
   396  	klog.Errorf("[resource-rec] create vpaRec for vpa %s/%s", vpa.Namespace, vpa.Name)
   397  	ownerRef := metav1.OwnerReference{
   398  		Name:       vpa.GetName(),
   399  		Kind:       vpa.GroupVersionKind().Kind,
   400  		APIVersion: vpa.GroupVersionKind().GroupVersion().String(),
   401  		UID:        vpa.UID,
   402  	}
   403  	vpaRec = &apis.VerticalPodAutoscalerRecommendation{
   404  		ObjectMeta: metav1.ObjectMeta{
   405  			OwnerReferences: []metav1.OwnerReference{ownerRef},
   406  			Namespace:       vpa.GetNamespace(),
   407  			Name:            vpa.GetName(),
   408  			Labels:          general.DeepCopyMap(vpa.GetLabels()),
   409  		},
   410  		Spec: apis.VerticalPodAutoscalerRecommendationSpec{},
   411  	}
   412  	return rrc.vpaRecUpdater.CreateVPARecommendation(rrc.ctx, vpaRec, metav1.CreateOptions{})
   413  }