github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/spd/spd.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package spd
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sync"
    23  	"time"
    24  
    25  	core "k8s.io/api/core/v1"
    26  	"k8s.io/apimachinery/pkg/api/errors"
    27  	"k8s.io/apimachinery/pkg/api/meta"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    30  	"k8s.io/apimachinery/pkg/labels"
    31  	"k8s.io/apimachinery/pkg/runtime/schema"
    32  	utilerrors "k8s.io/apimachinery/pkg/util/errors"
    33  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    34  	"k8s.io/apimachinery/pkg/util/wait"
    35  	corelisters "k8s.io/client-go/listers/core/v1"
    36  	"k8s.io/client-go/tools/cache"
    37  	"k8s.io/client-go/util/workqueue"
    38  	"k8s.io/klog/v2"
    39  	"k8s.io/utils/pointer"
    40  
    41  	"github.com/kubewharf/katalyst-api/pkg/apis/autoscaling/v1alpha1"
    42  	apiworkload "github.com/kubewharf/katalyst-api/pkg/apis/workload/v1alpha1"
    43  	apiListers "github.com/kubewharf/katalyst-api/pkg/client/listers/workload/v1alpha1"
    44  	apiconsts "github.com/kubewharf/katalyst-api/pkg/consts"
    45  	katalystbase "github.com/kubewharf/katalyst-core/cmd/base"
    46  	"github.com/kubewharf/katalyst-core/pkg/client/control"
    47  	"github.com/kubewharf/katalyst-core/pkg/config/controller"
    48  	"github.com/kubewharf/katalyst-core/pkg/config/generic"
    49  	"github.com/kubewharf/katalyst-core/pkg/consts"
    50  	indicator_plugin "github.com/kubewharf/katalyst-core/pkg/controller/spd/indicator-plugin"
    51  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    52  	"github.com/kubewharf/katalyst-core/pkg/util"
    53  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    54  	"github.com/kubewharf/katalyst-core/pkg/util/native"
    55  )
    56  
    57  const spdControllerName = "spd"
    58  
    59  const (
    60  	workloadWorkerCount        = 1
    61  	spdWorkerCount             = 1
    62  	indicatorSpecWorkerCount   = 1
    63  	indicatorStatusWorkerCount = 1
    64  )
    65  
    66  // SPDController is responsible to maintain lifecycle of SPD CR,
    67  // and sync and store the data represented in SPD.
    68  //
    69  // although we use informer index mechanism to speed up the looking
    70  // efficiency, we can't assume that all function callers MUST use an
    71  // indexed informer to look up objects.
    72  type SPDController struct {
    73  	ctx       context.Context
    74  	conf      *controller.SPDConfig
    75  	qosConfig *generic.QoSConfiguration
    76  
    77  	podUpdater      control.PodUpdater
    78  	spdControl      control.ServiceProfileControl
    79  	workloadControl control.UnstructuredControl
    80  	cncControl      control.CNCControl
    81  
    82  	spdIndexer cache.Indexer
    83  	podIndexer cache.Indexer
    84  
    85  	podLister           corelisters.PodLister
    86  	spdLister           apiListers.ServiceProfileDescriptorLister
    87  	workloadGVKLister   map[schema.GroupVersionKind]cache.GenericLister
    88  	workloadLister      map[schema.GroupVersionResource]cache.GenericLister
    89  	spdWorkloadInformer map[schema.GroupVersionResource]native.DynamicInformer
    90  
    91  	syncedFunc        []cache.InformerSynced
    92  	spdQueue          workqueue.RateLimitingInterface
    93  	workloadSyncQueue workqueue.RateLimitingInterface
    94  
    95  	metricsEmitter metrics.MetricEmitter
    96  
    97  	cncCacheController *cncCacheController
    98  
    99  	indicatorManager         *indicator_plugin.IndicatorManager
   100  	indicatorPlugins         map[string]indicator_plugin.IndicatorPlugin
   101  	indicatorsSpecBusiness   map[apiworkload.ServiceBusinessIndicatorName]interface{}
   102  	indicatorsSpecExtended   map[string]interface{}
   103  	indicatorsSpecSystem     map[apiworkload.ServiceSystemIndicatorName]interface{}
   104  	indicatorsStatusBusiness map[apiworkload.ServiceBusinessIndicatorName]interface{}
   105  }
   106  
   107  func NewSPDController(ctx context.Context, controlCtx *katalystbase.GenericContext,
   108  	genericConf *generic.GenericConfiguration, _ *controller.GenericControllerConfiguration,
   109  	conf *controller.SPDConfig, qosConfig *generic.QoSConfiguration, extraConf interface{},
   110  ) (*SPDController, error) {
   111  	if conf == nil || controlCtx.Client == nil || genericConf == nil {
   112  		return nil, fmt.Errorf("client, conf and generalConf can't be nil")
   113  	}
   114  
   115  	podInformer := controlCtx.KubeInformerFactory.Core().V1().Pods()
   116  	spdInformer := controlCtx.InternalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors()
   117  	cncInformer := controlCtx.InternalInformerFactory.Config().V1alpha1().CustomNodeConfigs()
   118  
   119  	spdController := &SPDController{
   120  		ctx:                 ctx,
   121  		conf:                conf,
   122  		qosConfig:           qosConfig,
   123  		podUpdater:          &control.DummyPodUpdater{},
   124  		spdControl:          &control.DummySPDControl{},
   125  		workloadControl:     &control.DummyUnstructuredControl{},
   126  		spdQueue:            workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "spd"),
   127  		workloadSyncQueue:   workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "workload"),
   128  		metricsEmitter:      controlCtx.EmitterPool.GetDefaultMetricsEmitter().WithTags(spdControllerName),
   129  		workloadGVKLister:   make(map[schema.GroupVersionKind]cache.GenericLister),
   130  		workloadLister:      make(map[schema.GroupVersionResource]cache.GenericLister),
   131  		spdWorkloadInformer: make(map[schema.GroupVersionResource]native.DynamicInformer),
   132  	}
   133  
   134  	spdController.podLister = podInformer.Lister()
   135  	spdController.syncedFunc = append(spdController.syncedFunc, podInformer.Informer().HasSynced)
   136  
   137  	spdController.spdLister = spdInformer.Lister()
   138  	spdController.syncedFunc = append(spdController.syncedFunc, spdInformer.Informer().HasSynced)
   139  
   140  	workloadInformers := controlCtx.DynamicResourcesManager.GetDynamicInformers()
   141  	for _, wf := range workloadInformers {
   142  		spdController.workloadGVKLister[wf.GVK] = wf.Informer.Lister()
   143  		spdController.workloadLister[wf.GVR] = wf.Informer.Lister()
   144  		spdController.syncedFunc = append(spdController.syncedFunc, wf.Informer.Informer().HasSynced)
   145  	}
   146  
   147  	for _, workload := range conf.SPDWorkloadGVResources {
   148  		wf, ok := workloadInformers[workload]
   149  		if !ok {
   150  			klog.Errorf("spd concerned workload %s not found in dynamic GVR resources", workload)
   151  			continue
   152  		}
   153  
   154  		spdController.spdWorkloadInformer[wf.GVR] = wf
   155  		wf.Informer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   156  			AddFunc:    spdController.addWorkload(workload),
   157  			UpdateFunc: spdController.updateWorkload(workload),
   158  		})
   159  	}
   160  
   161  	spdInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{
   162  		AddFunc:    spdController.addSPD,
   163  		UpdateFunc: spdController.updateSPD,
   164  	}, conf.ReSyncPeriod)
   165  
   166  	// build index: workload ---> spd
   167  	spdController.spdIndexer = spdInformer.Informer().GetIndexer()
   168  	if _, exist := spdController.spdIndexer.GetIndexers()[consts.TargetReferenceIndex]; !exist {
   169  		err := spdController.spdIndexer.AddIndexers(cache.Indexers{
   170  			consts.TargetReferenceIndex: util.SPDTargetReferenceIndex,
   171  		})
   172  		if err != nil {
   173  			klog.Errorf("[spd] failed to add target reference index for spd: %v", err)
   174  			return nil, err
   175  		}
   176  	}
   177  
   178  	// build index: workload ---> pod
   179  	spdController.podIndexer = podInformer.Informer().GetIndexer()
   180  	for _, key := range conf.SPDPodLabelIndexerKeys {
   181  		indexer := native.PodLabelIndexer(key)
   182  		if _, ok := spdController.podIndexer.GetIndexers()[key]; !ok {
   183  			err := spdController.podIndexer.AddIndexers(cache.Indexers{
   184  				key: indexer.IndexFunc,
   185  			})
   186  			if err != nil {
   187  				klog.Errorf("[spd] failed to add label index for pod: %v", err)
   188  				return nil, err
   189  			}
   190  		}
   191  	}
   192  
   193  	// spd controller need watch pod create and delete to update its spd baseline percentile key
   194  	podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   195  		AddFunc:    spdController.addPod,
   196  		UpdateFunc: spdController.updatePod,
   197  		DeleteFunc: spdController.deletePod,
   198  	})
   199  
   200  	if !genericConf.DryRun {
   201  		spdController.podUpdater = control.NewRealPodUpdater(controlCtx.Client.KubeClient)
   202  		spdController.spdControl = control.NewSPDControlImp(controlCtx.Client.InternalClient)
   203  		spdController.workloadControl = control.NewRealUnstructuredControl(controlCtx.Client.DynamicClient)
   204  		spdController.cncControl = control.NewRealCNCControl(controlCtx.Client.InternalClient)
   205  	}
   206  
   207  	var err error
   208  	spdController.cncCacheController, err = newCNCCacheController(ctx, podInformer,
   209  		cncInformer, spdInformer, spdController.workloadGVKLister, spdController.workloadLister,
   210  		spdController.cncControl, spdController.metricsEmitter, conf)
   211  	if err != nil {
   212  		return nil, err
   213  	}
   214  
   215  	if err := spdController.initializeIndicatorPlugins(controlCtx, extraConf); err != nil {
   216  		return nil, err
   217  	}
   218  
   219  	native.SetPodTransformer(podTransformerFunc)
   220  	return spdController, nil
   221  }
   222  
   223  func (sc *SPDController) Run() {
   224  	defer utilruntime.HandleCrash()
   225  	defer sc.workloadSyncQueue.ShutDown()
   226  	defer sc.spdQueue.ShutDown()
   227  	defer klog.Infof("shutting down %s controller", spdControllerName)
   228  
   229  	if !cache.WaitForCacheSync(sc.ctx.Done(), sc.syncedFunc...) {
   230  		utilruntime.HandleError(fmt.Errorf("unable to sync caches for %s controller", spdControllerName))
   231  		return
   232  	}
   233  	klog.Infof("caches are synced for %s controller", spdControllerName)
   234  
   235  	for i := 0; i < workloadWorkerCount; i++ {
   236  		go wait.Until(sc.workloadWorker, time.Second, sc.ctx.Done())
   237  	}
   238  	for i := 0; i < spdWorkerCount; i++ {
   239  		go wait.Until(sc.spdWorker, time.Second, sc.ctx.Done())
   240  	}
   241  	go wait.Until(sc.cleanSPD, time.Minute*5, sc.ctx.Done())
   242  
   243  	go sc.cncCacheController.Run()
   244  	for _, plugin := range sc.indicatorPlugins {
   245  		go plugin.Run()
   246  	}
   247  	for i := 0; i < indicatorSpecWorkerCount; i++ {
   248  		go wait.Until(sc.syncIndicatorSpec, time.Second, sc.ctx.Done())
   249  	}
   250  	for i := 0; i < indicatorStatusWorkerCount; i++ {
   251  		go wait.Until(sc.syncIndicatorStatus, time.Second, sc.ctx.Done())
   252  	}
   253  
   254  	<-sc.ctx.Done()
   255  }
   256  
   257  func (sc *SPDController) GetIndicatorPlugins() (plugins []indicator_plugin.IndicatorPlugin) {
   258  	for _, p := range sc.indicatorPlugins {
   259  		plugins = append(plugins, p)
   260  	}
   261  	return plugins
   262  }
   263  
   264  func (sc *SPDController) initializeIndicatorPlugins(controlCtx *katalystbase.GenericContext, extraConf interface{}) error {
   265  	sc.indicatorManager = indicator_plugin.NewIndicatorManager()
   266  	sc.indicatorPlugins = make(map[string]indicator_plugin.IndicatorPlugin)
   267  	sc.indicatorsSpecBusiness = make(map[apiworkload.ServiceBusinessIndicatorName]interface{})
   268  	sc.indicatorsSpecSystem = make(map[apiworkload.ServiceSystemIndicatorName]interface{})
   269  	sc.indicatorsSpecExtended = make(map[string]interface{})
   270  	sc.indicatorsStatusBusiness = make(map[apiworkload.ServiceBusinessIndicatorName]interface{})
   271  
   272  	initializers := indicator_plugin.GetPluginInitializers()
   273  	for _, pluginName := range sc.conf.IndicatorPlugins {
   274  		if initFunc, ok := initializers[pluginName]; ok {
   275  			plugin, err := initFunc(sc.ctx, sc.conf, extraConf, sc.spdWorkloadInformer,
   276  				controlCtx, sc.indicatorManager)
   277  			if err != nil {
   278  				return err
   279  			}
   280  
   281  			general.InfoS("indicator initialized", "plugin", pluginName)
   282  			sc.indicatorPlugins[pluginName] = plugin
   283  			for _, name := range plugin.GetSupportedBusinessIndicatorSpec() {
   284  				sc.indicatorsSpecBusiness[name] = struct{}{}
   285  			}
   286  			for _, name := range plugin.GetSupportedSystemIndicatorSpec() {
   287  				sc.indicatorsSpecSystem[name] = struct{}{}
   288  			}
   289  			for _, name := range plugin.GetSupportedExtendedIndicatorSpec() {
   290  				sc.indicatorsSpecExtended[name] = struct{}{}
   291  			}
   292  			for _, name := range plugin.GetSupportedBusinessIndicatorStatus() {
   293  				sc.indicatorsStatusBusiness[name] = struct{}{}
   294  			}
   295  		}
   296  	}
   297  
   298  	return nil
   299  }
   300  
   301  func (sc *SPDController) addWorkload(workloadGVR string) func(obj interface{}) {
   302  	return func(obj interface{}) {
   303  		workload, ok := obj.(metav1.Object)
   304  		if !ok {
   305  			klog.Errorf("[spd] cannot convert obj to metav1.Object")
   306  			return
   307  		}
   308  		sc.enqueueWorkload(workloadGVR, workload)
   309  	}
   310  }
   311  
   312  func (sc *SPDController) updateWorkload(workloadGVR string) func(oldObj, newObj interface{}) {
   313  	return func(_, cur interface{}) {
   314  		workload, ok := cur.(metav1.Object)
   315  		if !ok {
   316  			klog.Errorf("[spd] cannot convert cur obj to metav1.Object")
   317  			return
   318  		}
   319  		sc.enqueueWorkload(workloadGVR, workload)
   320  	}
   321  }
   322  
   323  func (sc *SPDController) enqueueWorkload(workloadGVR string, workload metav1.Object) {
   324  	if workload == nil {
   325  		klog.Warning("[spd] trying to enqueue a nil spd")
   326  		return
   327  	}
   328  
   329  	key, err := native.GenerateUniqGVRNameKey(workloadGVR, workload)
   330  	if err != nil {
   331  		utilruntime.HandleError(err)
   332  		return
   333  	}
   334  
   335  	sc.workloadSyncQueue.Add(key)
   336  }
   337  
   338  func (sc *SPDController) workloadWorker() {
   339  	for sc.processNextWorkload() {
   340  	}
   341  }
   342  
   343  func (sc *SPDController) processNextWorkload() bool {
   344  	key, quit := sc.workloadSyncQueue.Get()
   345  	if quit {
   346  		return false
   347  	}
   348  	defer sc.workloadSyncQueue.Done(key)
   349  
   350  	err := sc.syncWorkload(key.(string))
   351  	if err == nil {
   352  		sc.workloadSyncQueue.Forget(key)
   353  		return true
   354  	}
   355  
   356  	utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err))
   357  	sc.workloadSyncQueue.AddRateLimited(key)
   358  
   359  	return true
   360  }
   361  
   362  // syncWorkload is mainly responsible to maintain the lifecycle of spd for each
   363  // workload, without handling the service profile calculation logic.
   364  func (sc *SPDController) syncWorkload(key string) error {
   365  	klog.V(5).Infof("[spd] syncing workload [%v]", key)
   366  	workloadGVR, namespace, name, err := native.ParseUniqGVRNameKey(key)
   367  	if err != nil {
   368  		klog.Errorf("[spd] failed to parse key %s to workload", key)
   369  		return err
   370  	}
   371  
   372  	gvr, _ := schema.ParseResourceArg(workloadGVR)
   373  	if gvr == nil {
   374  		err = fmt.Errorf("[spd] ParseResourceArg worload %v failed", workloadGVR)
   375  		klog.Error(err)
   376  		return err
   377  	}
   378  
   379  	workload, err := sc.getWorkload(*gvr, namespace, name)
   380  	if err != nil {
   381  		klog.Errorf("[spd] failed to get workload %s/%s", namespace, name)
   382  		if errors.IsNotFound(err) {
   383  			return nil
   384  		}
   385  		return err
   386  	}
   387  
   388  	podList, err := native.GetPodListForWorkload(workload, sc.podIndexer, sc.conf.SPDPodLabelIndexerKeys, sc.podLister)
   389  	if err != nil {
   390  		klog.Errorf("[spd] get pod list for workload %s/%s failed: %v", namespace, name, err)
   391  		return err
   392  	}
   393  
   394  	if !util.WorkloadSPDEnabled(workload) {
   395  		if err := sc.cleanPodListSPDAnnotation(podList); err != nil {
   396  			klog.Errorf("[spd] clear pod list annotations for workload %s/%s failed: %v", namespace, name, err)
   397  			return err
   398  		}
   399  		return nil
   400  	}
   401  
   402  	spd, err := sc.getOrCreateSPDForWorkload(workload)
   403  	if err != nil {
   404  		klog.Errorf("[spd] get or create spd for workload %s/%s failed: %v", namespace, name, err)
   405  		return err
   406  	}
   407  
   408  	if err := sc.setPodListSPDAnnotation(podList, spd.Name); err != nil {
   409  		klog.Errorf("[spd] set pod list annotations for workload %s/%s failed: %v", namespace, name, err)
   410  		return err
   411  	}
   412  	return nil
   413  }
   414  
   415  func (sc *SPDController) addSPD(obj interface{}) {
   416  	spd, ok := obj.(*apiworkload.ServiceProfileDescriptor)
   417  	if !ok {
   418  		klog.Errorf("[spd] cannot convert obj to *apiworkload.ServiceProfileDescriptor")
   419  		return
   420  	}
   421  	sc.enqueueSPD(spd)
   422  }
   423  
   424  func (sc *SPDController) updateSPD(_, newObj interface{}) {
   425  	spd, ok := newObj.(*apiworkload.ServiceProfileDescriptor)
   426  	if !ok {
   427  		klog.Errorf("[spd] cannot convert obj to *apiworkload.ServiceProfileDescriptor")
   428  		return
   429  	}
   430  	sc.enqueueSPD(spd)
   431  }
   432  
   433  func (sc *SPDController) enqueueSPD(spd *apiworkload.ServiceProfileDescriptor) {
   434  	if spd == nil {
   435  		klog.Warning("[spd] trying to enqueue a nil spd")
   436  		return
   437  	}
   438  
   439  	key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(spd)
   440  	if err != nil {
   441  		utilruntime.HandleError(fmt.Errorf("[spd] couldn't get key for workload %#v: %v", spd, err))
   442  		return
   443  	}
   444  
   445  	sc.spdQueue.Add(key)
   446  }
   447  
   448  func (sc *SPDController) spdWorker() {
   449  	for sc.processNextSPD() {
   450  	}
   451  }
   452  
   453  func (sc *SPDController) processNextSPD() bool {
   454  	key, quit := sc.spdQueue.Get()
   455  	if quit {
   456  		return false
   457  	}
   458  	defer sc.spdQueue.Done(key)
   459  
   460  	err := sc.syncSPD(key.(string))
   461  	if err == nil {
   462  		sc.spdQueue.Forget(key)
   463  		return true
   464  	}
   465  
   466  	utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err))
   467  	sc.spdQueue.AddRateLimited(key)
   468  
   469  	return true
   470  }
   471  
   472  // syncSPD is mainly responsible to handle the service profile calculation logic for each
   473  // spd existed, and it will always assume that all spd is valid.
   474  func (sc *SPDController) syncSPD(key string) error {
   475  	klog.V(5).Infof("[spd] syncing spd [%v]", key)
   476  	namespace, name, err := cache.SplitMetaNamespaceKey(key)
   477  	if err != nil {
   478  		klog.Errorf("[spd] failed to split namespace and name from spd key %s", key)
   479  		return err
   480  	}
   481  
   482  	spd, err := sc.spdLister.ServiceProfileDescriptors(namespace).Get(name)
   483  	if err != nil {
   484  		klog.Errorf("[spd] failed to get spd [%v]", key)
   485  		if errors.IsNotFound(err) {
   486  			return nil
   487  		}
   488  		return err
   489  	}
   490  
   491  	// update baseline percentile
   492  	newSPD := spd.DeepCopy()
   493  	err = sc.updateSPDAnnotations(newSPD)
   494  	if err != nil {
   495  		return err
   496  	}
   497  
   498  	_, err = sc.spdControl.PatchSPD(sc.ctx, spd, newSPD)
   499  	if err != nil {
   500  		return err
   501  	}
   502  
   503  	return nil
   504  }
   505  
   506  // cleanSPD is mainly responsible to clean all spd CR that should not exist if its workload
   507  // is deleted or no longer enabled with service profiling logic.
   508  func (sc *SPDController) cleanSPD() {
   509  	spdList, err := sc.spdLister.List(labels.Everything())
   510  	if err != nil {
   511  		klog.Errorf("[spd] failed to list all spd: %v", err)
   512  	}
   513  
   514  	for _, spd := range spdList {
   515  		gvr, _ := meta.UnsafeGuessKindToResource(schema.FromAPIVersionAndKind(spd.Spec.TargetRef.APIVersion, spd.Spec.TargetRef.Kind))
   516  		workloadLister, ok := sc.workloadLister[gvr]
   517  		if !ok {
   518  			klog.Errorf("[spd] spd %s without workload lister", spd.Name)
   519  			continue
   520  		}
   521  
   522  		needDelete := false
   523  		workloadObj, err := util.GetWorkloadForSPD(spd, workloadLister)
   524  		if err != nil {
   525  			if errors.IsNotFound(err) {
   526  				needDelete = true
   527  			} else {
   528  				klog.Errorf("[spd] get workload for spd %s error: %v", spd.Name, err)
   529  			}
   530  		} else {
   531  			workload := workloadObj.(*unstructured.Unstructured)
   532  			if !util.WorkloadSPDEnabled(workload) {
   533  				needDelete = true
   534  
   535  				klog.Warningf("[spd] clear un-wanted spd annotation %v for workload %v", spd.Name, workload.GetName())
   536  			}
   537  		}
   538  
   539  		if needDelete {
   540  			klog.V(5).Infof("[spd] delete un-wanted spd %v", spd.Name)
   541  			if err := sc.spdControl.DeleteSPD(sc.ctx, spd, metav1.DeleteOptions{}); err != nil {
   542  				klog.Warningf("[spd] delete un-wanted spd %v err: %v", spd.Name, err)
   543  			}
   544  		}
   545  	}
   546  }
   547  
   548  // getWorkload is used to get workload info from dynamic lister according to the given GVR
   549  func (sc *SPDController) getWorkload(gvr schema.GroupVersionResource, namespace, name string) (*unstructured.Unstructured, error) {
   550  	if _, ok := sc.workloadLister[gvr]; !ok {
   551  		return nil, fmt.Errorf("can't find gvr %s from listers", gvr.String())
   552  	}
   553  
   554  	workloadLister := sc.workloadLister[gvr]
   555  	workloadObj, err := workloadLister.ByNamespace(namespace).Get(name)
   556  	if err != nil {
   557  		return nil, err
   558  	}
   559  
   560  	workload, ok := workloadObj.(*unstructured.Unstructured)
   561  	if !ok {
   562  		return nil, fmt.Errorf("failed to convert workload to *unstructured.Unstructured")
   563  	}
   564  
   565  	return workload, nil
   566  }
   567  
   568  // defaultBaselinePercent returns default baseline ratio based on the qos level of workload,
   569  // and if the configured data cannot be found, we will return 100,
   570  // which signifies that the resources of this workload cannot be reclaimed to reclaimed_cores.
   571  func (sc *SPDController) defaultBaselinePercent(workload *unstructured.Unstructured) *int32 {
   572  	podTemplateSpec, err := native.GetUnstructuredPodTemplateSpec(workload)
   573  	if err != nil {
   574  		general.ErrorS(err, "failed to GetUnstructuredPodTemplate")
   575  		return pointer.Int32(100)
   576  	}
   577  
   578  	pod := &core.Pod{
   579  		ObjectMeta: podTemplateSpec.ObjectMeta,
   580  		Spec:       podTemplateSpec.Spec,
   581  	}
   582  
   583  	qosLevel, err := sc.qosConfig.GetQoSLevel(pod, podTemplateSpec.Annotations)
   584  	if err != nil {
   585  		general.ErrorS(err, "failed to GetQoSLevel")
   586  		return pointer.Int32(100)
   587  	}
   588  	baselinePercent, ok := sc.conf.BaselinePercent[qosLevel]
   589  	if !ok {
   590  		general.InfoS("failed to get default baseline percent", "qosLevel", qosLevel)
   591  		return pointer.Int32(100)
   592  	}
   593  	return pointer.Int32(int32(baselinePercent))
   594  }
   595  
   596  // getOrCreateSPDForWorkload get workload's spd or create one if the spd doesn't exist
   597  func (sc *SPDController) getOrCreateSPDForWorkload(workload *unstructured.Unstructured) (*apiworkload.ServiceProfileDescriptor, error) {
   598  	gvk := workload.GroupVersionKind()
   599  	ownerRef := metav1.OwnerReference{
   600  		Name:       workload.GetName(),
   601  		Kind:       gvk.Kind,
   602  		APIVersion: gvk.GroupVersion().String(),
   603  		UID:        workload.GetUID(),
   604  	}
   605  
   606  	spd, err := util.GetSPDForWorkload(workload, sc.spdIndexer, sc.spdLister)
   607  	if err != nil {
   608  		if errors.IsNotFound(err) {
   609  			spd := &apiworkload.ServiceProfileDescriptor{
   610  				ObjectMeta: metav1.ObjectMeta{
   611  					Name:            workload.GetName(),
   612  					Namespace:       workload.GetNamespace(),
   613  					OwnerReferences: []metav1.OwnerReference{ownerRef},
   614  					Labels:          workload.GetLabels(),
   615  				},
   616  				Spec: apiworkload.ServiceProfileDescriptorSpec{
   617  					TargetRef: v1alpha1.CrossVersionObjectReference{
   618  						Name:       ownerRef.Name,
   619  						Kind:       ownerRef.Kind,
   620  						APIVersion: ownerRef.APIVersion,
   621  					},
   622  					BaselinePercent: sc.defaultBaselinePercent(workload),
   623  				},
   624  				Status: apiworkload.ServiceProfileDescriptorStatus{
   625  					AggMetrics: []apiworkload.AggPodMetrics{},
   626  				},
   627  			}
   628  
   629  			err := sc.updateSPDAnnotations(spd)
   630  			if err != nil {
   631  				return nil, err
   632  			}
   633  
   634  			return sc.spdControl.CreateSPD(sc.ctx, spd, metav1.CreateOptions{})
   635  		}
   636  
   637  		return nil, err
   638  	}
   639  
   640  	return spd, nil
   641  }
   642  
   643  func (sc *SPDController) setPodListSPDAnnotation(podList []*core.Pod, spdName string) error {
   644  	var mtx sync.Mutex
   645  	var errList []error
   646  	setPodAnnotations := func(i int) {
   647  		err := sc.setPodSPDAnnotation(podList[i], spdName)
   648  		if err != nil {
   649  			mtx.Lock()
   650  			errList = append(errList, err)
   651  			mtx.Unlock()
   652  			return
   653  		}
   654  	}
   655  	workqueue.ParallelizeUntil(sc.ctx, 16, len(podList), setPodAnnotations)
   656  	if len(errList) > 0 {
   657  		err := utilerrors.NewAggregate(errList)
   658  		klog.Errorf(err.Error())
   659  		return err
   660  	}
   661  
   662  	return nil
   663  }
   664  
   665  // setPodSPDAnnotation add spd name in pod annotations
   666  func (sc *SPDController) setPodSPDAnnotation(pod *core.Pod, spdName string) error {
   667  	if pod.GetAnnotations()[apiconsts.PodAnnotationSPDNameKey] == spdName {
   668  		return nil
   669  	}
   670  
   671  	podCopy := pod.DeepCopy()
   672  	annotations := podCopy.GetAnnotations()
   673  	if annotations == nil {
   674  		annotations = make(map[string]string)
   675  	}
   676  	annotations[apiconsts.PodAnnotationSPDNameKey] = spdName
   677  	podCopy.SetAnnotations(annotations)
   678  
   679  	err := sc.podUpdater.PatchPod(sc.ctx, pod, podCopy)
   680  	if err != nil {
   681  		return err
   682  	}
   683  
   684  	klog.Infof("[spd] successfully set annotations for pod %v to %v", pod.GetName(), spdName)
   685  	return nil
   686  }
   687  
   688  func (sc *SPDController) cleanPodListSPDAnnotation(podList []*core.Pod) error {
   689  	var mtx sync.Mutex
   690  	var errList []error
   691  	setPodAnnotations := func(i int) {
   692  		err := sc.cleanPodSPDAnnotation(podList[i])
   693  		if err != nil {
   694  			mtx.Lock()
   695  			errList = append(errList, err)
   696  			mtx.Unlock()
   697  			return
   698  		}
   699  	}
   700  	workqueue.ParallelizeUntil(sc.ctx, 16, len(podList), setPodAnnotations)
   701  	if len(errList) > 0 {
   702  		err := utilerrors.NewAggregate(errList)
   703  		klog.Errorf(err.Error())
   704  		return err
   705  	}
   706  
   707  	return nil
   708  }
   709  
   710  // cleanPodSPDAnnotation removes pod name in workload annotations
   711  func (sc *SPDController) cleanPodSPDAnnotation(pod *core.Pod) error {
   712  	if _, ok := pod.GetAnnotations()[apiconsts.PodAnnotationSPDNameKey]; !ok {
   713  		return nil
   714  	}
   715  
   716  	podCopy := pod.DeepCopy()
   717  	annotations := podCopy.GetAnnotations()
   718  	delete(annotations, apiconsts.PodAnnotationSPDNameKey)
   719  	podCopy.SetAnnotations(annotations)
   720  
   721  	err := sc.podUpdater.PatchPod(sc.ctx, pod, podCopy)
   722  	if err != nil {
   723  		return err
   724  	}
   725  
   726  	klog.Infof("[spd] successfully clear annotations for pod %v", pod.GetName())
   727  	return nil
   728  }
   729  
   730  func (sc *SPDController) addPod(obj interface{}) {
   731  	pod, ok := obj.(*core.Pod)
   732  	if !ok {
   733  		klog.Errorf("[spd] cannot convert obj to *core.Pod")
   734  		return
   735  	}
   736  	sc.enqueuePod(pod)
   737  }
   738  
   739  func (sc *SPDController) deletePod(obj interface{}) {
   740  	pod, ok := obj.(*core.Pod)
   741  	if !ok {
   742  		klog.Errorf("[spd] cannot convert obj to *core.Pod")
   743  		return
   744  	}
   745  	sc.enqueuePod(pod)
   746  }
   747  
   748  func (sc *SPDController) updatePod(_ interface{}, newObj interface{}) {
   749  	pod, ok := newObj.(*core.Pod)
   750  	if !ok {
   751  		klog.Errorf("[spd] cannot convert obj to *core.Pod")
   752  		return
   753  	}
   754  	sc.enqueuePod(pod)
   755  }
   756  
   757  func (sc *SPDController) enqueuePod(pod *core.Pod) {
   758  	name, err := util.GetPodSPDName(pod.ObjectMeta)
   759  	if err != nil {
   760  		return
   761  	}
   762  
   763  	spd, err := sc.spdLister.ServiceProfileDescriptors(pod.Namespace).Get(name)
   764  	if err != nil {
   765  		return
   766  	}
   767  	sc.enqueueSPD(spd)
   768  }
   769  
   770  func (sc *SPDController) updateSPDAnnotations(spd *apiworkload.ServiceProfileDescriptor) error {
   771  	err := sc.updateBaselineSentinel(spd)
   772  	if err != nil {
   773  		return err
   774  	}
   775  
   776  	err = sc.updateHash(spd)
   777  	if err != nil {
   778  		return err
   779  	}
   780  
   781  	return nil
   782  }
   783  
   784  func (sc *SPDController) updateHash(spd *apiworkload.ServiceProfileDescriptor) error {
   785  	hash, err := util.CalculateSPDHash(spd)
   786  	if err != nil {
   787  		return err
   788  	}
   789  
   790  	util.SetSPDHash(spd, hash)
   791  	return nil
   792  }
   793  
   794  func podTransformerFunc(src, dest *core.Pod) {
   795  	dest.Spec.NodeName = src.Spec.NodeName
   796  	dest.Status.Phase = src.Status.Phase
   797  	containerStatusesTransformerFunc(&src.Status.ContainerStatuses, &dest.Status.ContainerStatuses)
   798  }
   799  
   800  func containerStatusesTransformerFunc(src, dst *[]core.ContainerStatus) {
   801  	if src == nil || len(*src) == 0 {
   802  		return
   803  	}
   804  
   805  	if len(*dst) == 0 {
   806  		*dst = make([]core.ContainerStatus, len(*src))
   807  	}
   808  
   809  	for i, c := range *src {
   810  		(*dst)[i].State = c.State
   811  	}
   812  }