github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/spd/spd.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package spd 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "time" 24 25 core "k8s.io/api/core/v1" 26 "k8s.io/apimachinery/pkg/api/errors" 27 "k8s.io/apimachinery/pkg/api/meta" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 30 "k8s.io/apimachinery/pkg/labels" 31 "k8s.io/apimachinery/pkg/runtime/schema" 32 utilerrors "k8s.io/apimachinery/pkg/util/errors" 33 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 34 "k8s.io/apimachinery/pkg/util/wait" 35 corelisters "k8s.io/client-go/listers/core/v1" 36 "k8s.io/client-go/tools/cache" 37 "k8s.io/client-go/util/workqueue" 38 "k8s.io/klog/v2" 39 "k8s.io/utils/pointer" 40 41 "github.com/kubewharf/katalyst-api/pkg/apis/autoscaling/v1alpha1" 42 apiworkload "github.com/kubewharf/katalyst-api/pkg/apis/workload/v1alpha1" 43 apiListers "github.com/kubewharf/katalyst-api/pkg/client/listers/workload/v1alpha1" 44 apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" 45 katalystbase "github.com/kubewharf/katalyst-core/cmd/base" 46 "github.com/kubewharf/katalyst-core/pkg/client/control" 47 "github.com/kubewharf/katalyst-core/pkg/config/controller" 48 "github.com/kubewharf/katalyst-core/pkg/config/generic" 49 "github.com/kubewharf/katalyst-core/pkg/consts" 50 indicator_plugin "github.com/kubewharf/katalyst-core/pkg/controller/spd/indicator-plugin" 51 "github.com/kubewharf/katalyst-core/pkg/metrics" 52 "github.com/kubewharf/katalyst-core/pkg/util" 53 "github.com/kubewharf/katalyst-core/pkg/util/general" 54 "github.com/kubewharf/katalyst-core/pkg/util/native" 55 ) 56 57 const spdControllerName = "spd" 58 59 const ( 60 workloadWorkerCount = 1 61 spdWorkerCount = 1 62 indicatorSpecWorkerCount = 1 63 indicatorStatusWorkerCount = 1 64 ) 65 66 // SPDController is responsible to maintain lifecycle of SPD CR, 67 // and sync and store the data represented in SPD. 68 // 69 // although we use informer index mechanism to speed up the looking 70 // efficiency, we can't assume that all function callers MUST use an 71 // indexed informer to look up objects. 72 type SPDController struct { 73 ctx context.Context 74 conf *controller.SPDConfig 75 qosConfig *generic.QoSConfiguration 76 77 podUpdater control.PodUpdater 78 spdControl control.ServiceProfileControl 79 workloadControl control.UnstructuredControl 80 cncControl control.CNCControl 81 82 spdIndexer cache.Indexer 83 podIndexer cache.Indexer 84 85 podLister corelisters.PodLister 86 spdLister apiListers.ServiceProfileDescriptorLister 87 workloadGVKLister map[schema.GroupVersionKind]cache.GenericLister 88 workloadLister map[schema.GroupVersionResource]cache.GenericLister 89 spdWorkloadInformer map[schema.GroupVersionResource]native.DynamicInformer 90 91 syncedFunc []cache.InformerSynced 92 spdQueue workqueue.RateLimitingInterface 93 workloadSyncQueue workqueue.RateLimitingInterface 94 95 metricsEmitter metrics.MetricEmitter 96 97 cncCacheController *cncCacheController 98 99 indicatorManager *indicator_plugin.IndicatorManager 100 indicatorPlugins map[string]indicator_plugin.IndicatorPlugin 101 indicatorsSpecBusiness map[apiworkload.ServiceBusinessIndicatorName]interface{} 102 indicatorsSpecExtended map[string]interface{} 103 indicatorsSpecSystem map[apiworkload.ServiceSystemIndicatorName]interface{} 104 indicatorsStatusBusiness map[apiworkload.ServiceBusinessIndicatorName]interface{} 105 } 106 107 func NewSPDController(ctx context.Context, controlCtx *katalystbase.GenericContext, 108 genericConf *generic.GenericConfiguration, _ *controller.GenericControllerConfiguration, 109 conf *controller.SPDConfig, qosConfig *generic.QoSConfiguration, extraConf interface{}, 110 ) (*SPDController, error) { 111 if conf == nil || controlCtx.Client == nil || genericConf == nil { 112 return nil, fmt.Errorf("client, conf and generalConf can't be nil") 113 } 114 115 podInformer := controlCtx.KubeInformerFactory.Core().V1().Pods() 116 spdInformer := controlCtx.InternalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors() 117 cncInformer := controlCtx.InternalInformerFactory.Config().V1alpha1().CustomNodeConfigs() 118 119 spdController := &SPDController{ 120 ctx: ctx, 121 conf: conf, 122 qosConfig: qosConfig, 123 podUpdater: &control.DummyPodUpdater{}, 124 spdControl: &control.DummySPDControl{}, 125 workloadControl: &control.DummyUnstructuredControl{}, 126 spdQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "spd"), 127 workloadSyncQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "workload"), 128 metricsEmitter: controlCtx.EmitterPool.GetDefaultMetricsEmitter().WithTags(spdControllerName), 129 workloadGVKLister: make(map[schema.GroupVersionKind]cache.GenericLister), 130 workloadLister: make(map[schema.GroupVersionResource]cache.GenericLister), 131 spdWorkloadInformer: make(map[schema.GroupVersionResource]native.DynamicInformer), 132 } 133 134 spdController.podLister = podInformer.Lister() 135 spdController.syncedFunc = append(spdController.syncedFunc, podInformer.Informer().HasSynced) 136 137 spdController.spdLister = spdInformer.Lister() 138 spdController.syncedFunc = append(spdController.syncedFunc, spdInformer.Informer().HasSynced) 139 140 workloadInformers := controlCtx.DynamicResourcesManager.GetDynamicInformers() 141 for _, wf := range workloadInformers { 142 spdController.workloadGVKLister[wf.GVK] = wf.Informer.Lister() 143 spdController.workloadLister[wf.GVR] = wf.Informer.Lister() 144 spdController.syncedFunc = append(spdController.syncedFunc, wf.Informer.Informer().HasSynced) 145 } 146 147 for _, workload := range conf.SPDWorkloadGVResources { 148 wf, ok := workloadInformers[workload] 149 if !ok { 150 klog.Errorf("spd concerned workload %s not found in dynamic GVR resources", workload) 151 continue 152 } 153 154 spdController.spdWorkloadInformer[wf.GVR] = wf 155 wf.Informer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 156 AddFunc: spdController.addWorkload(workload), 157 UpdateFunc: spdController.updateWorkload(workload), 158 }) 159 } 160 161 spdInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{ 162 AddFunc: spdController.addSPD, 163 UpdateFunc: spdController.updateSPD, 164 }, conf.ReSyncPeriod) 165 166 // build index: workload ---> spd 167 spdController.spdIndexer = spdInformer.Informer().GetIndexer() 168 if _, exist := spdController.spdIndexer.GetIndexers()[consts.TargetReferenceIndex]; !exist { 169 err := spdController.spdIndexer.AddIndexers(cache.Indexers{ 170 consts.TargetReferenceIndex: util.SPDTargetReferenceIndex, 171 }) 172 if err != nil { 173 klog.Errorf("[spd] failed to add target reference index for spd: %v", err) 174 return nil, err 175 } 176 } 177 178 // build index: workload ---> pod 179 spdController.podIndexer = podInformer.Informer().GetIndexer() 180 for _, key := range conf.SPDPodLabelIndexerKeys { 181 indexer := native.PodLabelIndexer(key) 182 if _, ok := spdController.podIndexer.GetIndexers()[key]; !ok { 183 err := spdController.podIndexer.AddIndexers(cache.Indexers{ 184 key: indexer.IndexFunc, 185 }) 186 if err != nil { 187 klog.Errorf("[spd] failed to add label index for pod: %v", err) 188 return nil, err 189 } 190 } 191 } 192 193 // spd controller need watch pod create and delete to update its spd baseline percentile key 194 podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 195 AddFunc: spdController.addPod, 196 UpdateFunc: spdController.updatePod, 197 DeleteFunc: spdController.deletePod, 198 }) 199 200 if !genericConf.DryRun { 201 spdController.podUpdater = control.NewRealPodUpdater(controlCtx.Client.KubeClient) 202 spdController.spdControl = control.NewSPDControlImp(controlCtx.Client.InternalClient) 203 spdController.workloadControl = control.NewRealUnstructuredControl(controlCtx.Client.DynamicClient) 204 spdController.cncControl = control.NewRealCNCControl(controlCtx.Client.InternalClient) 205 } 206 207 var err error 208 spdController.cncCacheController, err = newCNCCacheController(ctx, podInformer, 209 cncInformer, spdInformer, spdController.workloadGVKLister, spdController.workloadLister, 210 spdController.cncControl, spdController.metricsEmitter, conf) 211 if err != nil { 212 return nil, err 213 } 214 215 if err := spdController.initializeIndicatorPlugins(controlCtx, extraConf); err != nil { 216 return nil, err 217 } 218 219 native.SetPodTransformer(podTransformerFunc) 220 return spdController, nil 221 } 222 223 func (sc *SPDController) Run() { 224 defer utilruntime.HandleCrash() 225 defer sc.workloadSyncQueue.ShutDown() 226 defer sc.spdQueue.ShutDown() 227 defer klog.Infof("shutting down %s controller", spdControllerName) 228 229 if !cache.WaitForCacheSync(sc.ctx.Done(), sc.syncedFunc...) { 230 utilruntime.HandleError(fmt.Errorf("unable to sync caches for %s controller", spdControllerName)) 231 return 232 } 233 klog.Infof("caches are synced for %s controller", spdControllerName) 234 235 for i := 0; i < workloadWorkerCount; i++ { 236 go wait.Until(sc.workloadWorker, time.Second, sc.ctx.Done()) 237 } 238 for i := 0; i < spdWorkerCount; i++ { 239 go wait.Until(sc.spdWorker, time.Second, sc.ctx.Done()) 240 } 241 go wait.Until(sc.cleanSPD, time.Minute*5, sc.ctx.Done()) 242 243 go sc.cncCacheController.Run() 244 for _, plugin := range sc.indicatorPlugins { 245 go plugin.Run() 246 } 247 for i := 0; i < indicatorSpecWorkerCount; i++ { 248 go wait.Until(sc.syncIndicatorSpec, time.Second, sc.ctx.Done()) 249 } 250 for i := 0; i < indicatorStatusWorkerCount; i++ { 251 go wait.Until(sc.syncIndicatorStatus, time.Second, sc.ctx.Done()) 252 } 253 254 <-sc.ctx.Done() 255 } 256 257 func (sc *SPDController) GetIndicatorPlugins() (plugins []indicator_plugin.IndicatorPlugin) { 258 for _, p := range sc.indicatorPlugins { 259 plugins = append(plugins, p) 260 } 261 return plugins 262 } 263 264 func (sc *SPDController) initializeIndicatorPlugins(controlCtx *katalystbase.GenericContext, extraConf interface{}) error { 265 sc.indicatorManager = indicator_plugin.NewIndicatorManager() 266 sc.indicatorPlugins = make(map[string]indicator_plugin.IndicatorPlugin) 267 sc.indicatorsSpecBusiness = make(map[apiworkload.ServiceBusinessIndicatorName]interface{}) 268 sc.indicatorsSpecSystem = make(map[apiworkload.ServiceSystemIndicatorName]interface{}) 269 sc.indicatorsSpecExtended = make(map[string]interface{}) 270 sc.indicatorsStatusBusiness = make(map[apiworkload.ServiceBusinessIndicatorName]interface{}) 271 272 initializers := indicator_plugin.GetPluginInitializers() 273 for _, pluginName := range sc.conf.IndicatorPlugins { 274 if initFunc, ok := initializers[pluginName]; ok { 275 plugin, err := initFunc(sc.ctx, sc.conf, extraConf, sc.spdWorkloadInformer, 276 controlCtx, sc.indicatorManager) 277 if err != nil { 278 return err 279 } 280 281 general.InfoS("indicator initialized", "plugin", pluginName) 282 sc.indicatorPlugins[pluginName] = plugin 283 for _, name := range plugin.GetSupportedBusinessIndicatorSpec() { 284 sc.indicatorsSpecBusiness[name] = struct{}{} 285 } 286 for _, name := range plugin.GetSupportedSystemIndicatorSpec() { 287 sc.indicatorsSpecSystem[name] = struct{}{} 288 } 289 for _, name := range plugin.GetSupportedExtendedIndicatorSpec() { 290 sc.indicatorsSpecExtended[name] = struct{}{} 291 } 292 for _, name := range plugin.GetSupportedBusinessIndicatorStatus() { 293 sc.indicatorsStatusBusiness[name] = struct{}{} 294 } 295 } 296 } 297 298 return nil 299 } 300 301 func (sc *SPDController) addWorkload(workloadGVR string) func(obj interface{}) { 302 return func(obj interface{}) { 303 workload, ok := obj.(metav1.Object) 304 if !ok { 305 klog.Errorf("[spd] cannot convert obj to metav1.Object") 306 return 307 } 308 sc.enqueueWorkload(workloadGVR, workload) 309 } 310 } 311 312 func (sc *SPDController) updateWorkload(workloadGVR string) func(oldObj, newObj interface{}) { 313 return func(_, cur interface{}) { 314 workload, ok := cur.(metav1.Object) 315 if !ok { 316 klog.Errorf("[spd] cannot convert cur obj to metav1.Object") 317 return 318 } 319 sc.enqueueWorkload(workloadGVR, workload) 320 } 321 } 322 323 func (sc *SPDController) enqueueWorkload(workloadGVR string, workload metav1.Object) { 324 if workload == nil { 325 klog.Warning("[spd] trying to enqueue a nil spd") 326 return 327 } 328 329 key, err := native.GenerateUniqGVRNameKey(workloadGVR, workload) 330 if err != nil { 331 utilruntime.HandleError(err) 332 return 333 } 334 335 sc.workloadSyncQueue.Add(key) 336 } 337 338 func (sc *SPDController) workloadWorker() { 339 for sc.processNextWorkload() { 340 } 341 } 342 343 func (sc *SPDController) processNextWorkload() bool { 344 key, quit := sc.workloadSyncQueue.Get() 345 if quit { 346 return false 347 } 348 defer sc.workloadSyncQueue.Done(key) 349 350 err := sc.syncWorkload(key.(string)) 351 if err == nil { 352 sc.workloadSyncQueue.Forget(key) 353 return true 354 } 355 356 utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err)) 357 sc.workloadSyncQueue.AddRateLimited(key) 358 359 return true 360 } 361 362 // syncWorkload is mainly responsible to maintain the lifecycle of spd for each 363 // workload, without handling the service profile calculation logic. 364 func (sc *SPDController) syncWorkload(key string) error { 365 klog.V(5).Infof("[spd] syncing workload [%v]", key) 366 workloadGVR, namespace, name, err := native.ParseUniqGVRNameKey(key) 367 if err != nil { 368 klog.Errorf("[spd] failed to parse key %s to workload", key) 369 return err 370 } 371 372 gvr, _ := schema.ParseResourceArg(workloadGVR) 373 if gvr == nil { 374 err = fmt.Errorf("[spd] ParseResourceArg worload %v failed", workloadGVR) 375 klog.Error(err) 376 return err 377 } 378 379 workload, err := sc.getWorkload(*gvr, namespace, name) 380 if err != nil { 381 klog.Errorf("[spd] failed to get workload %s/%s", namespace, name) 382 if errors.IsNotFound(err) { 383 return nil 384 } 385 return err 386 } 387 388 podList, err := native.GetPodListForWorkload(workload, sc.podIndexer, sc.conf.SPDPodLabelIndexerKeys, sc.podLister) 389 if err != nil { 390 klog.Errorf("[spd] get pod list for workload %s/%s failed: %v", namespace, name, err) 391 return err 392 } 393 394 if !util.WorkloadSPDEnabled(workload) { 395 if err := sc.cleanPodListSPDAnnotation(podList); err != nil { 396 klog.Errorf("[spd] clear pod list annotations for workload %s/%s failed: %v", namespace, name, err) 397 return err 398 } 399 return nil 400 } 401 402 spd, err := sc.getOrCreateSPDForWorkload(workload) 403 if err != nil { 404 klog.Errorf("[spd] get or create spd for workload %s/%s failed: %v", namespace, name, err) 405 return err 406 } 407 408 if err := sc.setPodListSPDAnnotation(podList, spd.Name); err != nil { 409 klog.Errorf("[spd] set pod list annotations for workload %s/%s failed: %v", namespace, name, err) 410 return err 411 } 412 return nil 413 } 414 415 func (sc *SPDController) addSPD(obj interface{}) { 416 spd, ok := obj.(*apiworkload.ServiceProfileDescriptor) 417 if !ok { 418 klog.Errorf("[spd] cannot convert obj to *apiworkload.ServiceProfileDescriptor") 419 return 420 } 421 sc.enqueueSPD(spd) 422 } 423 424 func (sc *SPDController) updateSPD(_, newObj interface{}) { 425 spd, ok := newObj.(*apiworkload.ServiceProfileDescriptor) 426 if !ok { 427 klog.Errorf("[spd] cannot convert obj to *apiworkload.ServiceProfileDescriptor") 428 return 429 } 430 sc.enqueueSPD(spd) 431 } 432 433 func (sc *SPDController) enqueueSPD(spd *apiworkload.ServiceProfileDescriptor) { 434 if spd == nil { 435 klog.Warning("[spd] trying to enqueue a nil spd") 436 return 437 } 438 439 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(spd) 440 if err != nil { 441 utilruntime.HandleError(fmt.Errorf("[spd] couldn't get key for workload %#v: %v", spd, err)) 442 return 443 } 444 445 sc.spdQueue.Add(key) 446 } 447 448 func (sc *SPDController) spdWorker() { 449 for sc.processNextSPD() { 450 } 451 } 452 453 func (sc *SPDController) processNextSPD() bool { 454 key, quit := sc.spdQueue.Get() 455 if quit { 456 return false 457 } 458 defer sc.spdQueue.Done(key) 459 460 err := sc.syncSPD(key.(string)) 461 if err == nil { 462 sc.spdQueue.Forget(key) 463 return true 464 } 465 466 utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err)) 467 sc.spdQueue.AddRateLimited(key) 468 469 return true 470 } 471 472 // syncSPD is mainly responsible to handle the service profile calculation logic for each 473 // spd existed, and it will always assume that all spd is valid. 474 func (sc *SPDController) syncSPD(key string) error { 475 klog.V(5).Infof("[spd] syncing spd [%v]", key) 476 namespace, name, err := cache.SplitMetaNamespaceKey(key) 477 if err != nil { 478 klog.Errorf("[spd] failed to split namespace and name from spd key %s", key) 479 return err 480 } 481 482 spd, err := sc.spdLister.ServiceProfileDescriptors(namespace).Get(name) 483 if err != nil { 484 klog.Errorf("[spd] failed to get spd [%v]", key) 485 if errors.IsNotFound(err) { 486 return nil 487 } 488 return err 489 } 490 491 // update baseline percentile 492 newSPD := spd.DeepCopy() 493 err = sc.updateSPDAnnotations(newSPD) 494 if err != nil { 495 return err 496 } 497 498 _, err = sc.spdControl.PatchSPD(sc.ctx, spd, newSPD) 499 if err != nil { 500 return err 501 } 502 503 return nil 504 } 505 506 // cleanSPD is mainly responsible to clean all spd CR that should not exist if its workload 507 // is deleted or no longer enabled with service profiling logic. 508 func (sc *SPDController) cleanSPD() { 509 spdList, err := sc.spdLister.List(labels.Everything()) 510 if err != nil { 511 klog.Errorf("[spd] failed to list all spd: %v", err) 512 } 513 514 for _, spd := range spdList { 515 gvr, _ := meta.UnsafeGuessKindToResource(schema.FromAPIVersionAndKind(spd.Spec.TargetRef.APIVersion, spd.Spec.TargetRef.Kind)) 516 workloadLister, ok := sc.workloadLister[gvr] 517 if !ok { 518 klog.Errorf("[spd] spd %s without workload lister", spd.Name) 519 continue 520 } 521 522 needDelete := false 523 workloadObj, err := util.GetWorkloadForSPD(spd, workloadLister) 524 if err != nil { 525 if errors.IsNotFound(err) { 526 needDelete = true 527 } else { 528 klog.Errorf("[spd] get workload for spd %s error: %v", spd.Name, err) 529 } 530 } else { 531 workload := workloadObj.(*unstructured.Unstructured) 532 if !util.WorkloadSPDEnabled(workload) { 533 needDelete = true 534 535 klog.Warningf("[spd] clear un-wanted spd annotation %v for workload %v", spd.Name, workload.GetName()) 536 } 537 } 538 539 if needDelete { 540 klog.V(5).Infof("[spd] delete un-wanted spd %v", spd.Name) 541 if err := sc.spdControl.DeleteSPD(sc.ctx, spd, metav1.DeleteOptions{}); err != nil { 542 klog.Warningf("[spd] delete un-wanted spd %v err: %v", spd.Name, err) 543 } 544 } 545 } 546 } 547 548 // getWorkload is used to get workload info from dynamic lister according to the given GVR 549 func (sc *SPDController) getWorkload(gvr schema.GroupVersionResource, namespace, name string) (*unstructured.Unstructured, error) { 550 if _, ok := sc.workloadLister[gvr]; !ok { 551 return nil, fmt.Errorf("can't find gvr %s from listers", gvr.String()) 552 } 553 554 workloadLister := sc.workloadLister[gvr] 555 workloadObj, err := workloadLister.ByNamespace(namespace).Get(name) 556 if err != nil { 557 return nil, err 558 } 559 560 workload, ok := workloadObj.(*unstructured.Unstructured) 561 if !ok { 562 return nil, fmt.Errorf("failed to convert workload to *unstructured.Unstructured") 563 } 564 565 return workload, nil 566 } 567 568 // defaultBaselinePercent returns default baseline ratio based on the qos level of workload, 569 // and if the configured data cannot be found, we will return 100, 570 // which signifies that the resources of this workload cannot be reclaimed to reclaimed_cores. 571 func (sc *SPDController) defaultBaselinePercent(workload *unstructured.Unstructured) *int32 { 572 podTemplateSpec, err := native.GetUnstructuredPodTemplateSpec(workload) 573 if err != nil { 574 general.ErrorS(err, "failed to GetUnstructuredPodTemplate") 575 return pointer.Int32(100) 576 } 577 578 pod := &core.Pod{ 579 ObjectMeta: podTemplateSpec.ObjectMeta, 580 Spec: podTemplateSpec.Spec, 581 } 582 583 qosLevel, err := sc.qosConfig.GetQoSLevel(pod, podTemplateSpec.Annotations) 584 if err != nil { 585 general.ErrorS(err, "failed to GetQoSLevel") 586 return pointer.Int32(100) 587 } 588 baselinePercent, ok := sc.conf.BaselinePercent[qosLevel] 589 if !ok { 590 general.InfoS("failed to get default baseline percent", "qosLevel", qosLevel) 591 return pointer.Int32(100) 592 } 593 return pointer.Int32(int32(baselinePercent)) 594 } 595 596 // getOrCreateSPDForWorkload get workload's spd or create one if the spd doesn't exist 597 func (sc *SPDController) getOrCreateSPDForWorkload(workload *unstructured.Unstructured) (*apiworkload.ServiceProfileDescriptor, error) { 598 gvk := workload.GroupVersionKind() 599 ownerRef := metav1.OwnerReference{ 600 Name: workload.GetName(), 601 Kind: gvk.Kind, 602 APIVersion: gvk.GroupVersion().String(), 603 UID: workload.GetUID(), 604 } 605 606 spd, err := util.GetSPDForWorkload(workload, sc.spdIndexer, sc.spdLister) 607 if err != nil { 608 if errors.IsNotFound(err) { 609 spd := &apiworkload.ServiceProfileDescriptor{ 610 ObjectMeta: metav1.ObjectMeta{ 611 Name: workload.GetName(), 612 Namespace: workload.GetNamespace(), 613 OwnerReferences: []metav1.OwnerReference{ownerRef}, 614 Labels: workload.GetLabels(), 615 }, 616 Spec: apiworkload.ServiceProfileDescriptorSpec{ 617 TargetRef: v1alpha1.CrossVersionObjectReference{ 618 Name: ownerRef.Name, 619 Kind: ownerRef.Kind, 620 APIVersion: ownerRef.APIVersion, 621 }, 622 BaselinePercent: sc.defaultBaselinePercent(workload), 623 }, 624 Status: apiworkload.ServiceProfileDescriptorStatus{ 625 AggMetrics: []apiworkload.AggPodMetrics{}, 626 }, 627 } 628 629 err := sc.updateSPDAnnotations(spd) 630 if err != nil { 631 return nil, err 632 } 633 634 return sc.spdControl.CreateSPD(sc.ctx, spd, metav1.CreateOptions{}) 635 } 636 637 return nil, err 638 } 639 640 return spd, nil 641 } 642 643 func (sc *SPDController) setPodListSPDAnnotation(podList []*core.Pod, spdName string) error { 644 var mtx sync.Mutex 645 var errList []error 646 setPodAnnotations := func(i int) { 647 err := sc.setPodSPDAnnotation(podList[i], spdName) 648 if err != nil { 649 mtx.Lock() 650 errList = append(errList, err) 651 mtx.Unlock() 652 return 653 } 654 } 655 workqueue.ParallelizeUntil(sc.ctx, 16, len(podList), setPodAnnotations) 656 if len(errList) > 0 { 657 err := utilerrors.NewAggregate(errList) 658 klog.Errorf(err.Error()) 659 return err 660 } 661 662 return nil 663 } 664 665 // setPodSPDAnnotation add spd name in pod annotations 666 func (sc *SPDController) setPodSPDAnnotation(pod *core.Pod, spdName string) error { 667 if pod.GetAnnotations()[apiconsts.PodAnnotationSPDNameKey] == spdName { 668 return nil 669 } 670 671 podCopy := pod.DeepCopy() 672 annotations := podCopy.GetAnnotations() 673 if annotations == nil { 674 annotations = make(map[string]string) 675 } 676 annotations[apiconsts.PodAnnotationSPDNameKey] = spdName 677 podCopy.SetAnnotations(annotations) 678 679 err := sc.podUpdater.PatchPod(sc.ctx, pod, podCopy) 680 if err != nil { 681 return err 682 } 683 684 klog.Infof("[spd] successfully set annotations for pod %v to %v", pod.GetName(), spdName) 685 return nil 686 } 687 688 func (sc *SPDController) cleanPodListSPDAnnotation(podList []*core.Pod) error { 689 var mtx sync.Mutex 690 var errList []error 691 setPodAnnotations := func(i int) { 692 err := sc.cleanPodSPDAnnotation(podList[i]) 693 if err != nil { 694 mtx.Lock() 695 errList = append(errList, err) 696 mtx.Unlock() 697 return 698 } 699 } 700 workqueue.ParallelizeUntil(sc.ctx, 16, len(podList), setPodAnnotations) 701 if len(errList) > 0 { 702 err := utilerrors.NewAggregate(errList) 703 klog.Errorf(err.Error()) 704 return err 705 } 706 707 return nil 708 } 709 710 // cleanPodSPDAnnotation removes pod name in workload annotations 711 func (sc *SPDController) cleanPodSPDAnnotation(pod *core.Pod) error { 712 if _, ok := pod.GetAnnotations()[apiconsts.PodAnnotationSPDNameKey]; !ok { 713 return nil 714 } 715 716 podCopy := pod.DeepCopy() 717 annotations := podCopy.GetAnnotations() 718 delete(annotations, apiconsts.PodAnnotationSPDNameKey) 719 podCopy.SetAnnotations(annotations) 720 721 err := sc.podUpdater.PatchPod(sc.ctx, pod, podCopy) 722 if err != nil { 723 return err 724 } 725 726 klog.Infof("[spd] successfully clear annotations for pod %v", pod.GetName()) 727 return nil 728 } 729 730 func (sc *SPDController) addPod(obj interface{}) { 731 pod, ok := obj.(*core.Pod) 732 if !ok { 733 klog.Errorf("[spd] cannot convert obj to *core.Pod") 734 return 735 } 736 sc.enqueuePod(pod) 737 } 738 739 func (sc *SPDController) deletePod(obj interface{}) { 740 pod, ok := obj.(*core.Pod) 741 if !ok { 742 klog.Errorf("[spd] cannot convert obj to *core.Pod") 743 return 744 } 745 sc.enqueuePod(pod) 746 } 747 748 func (sc *SPDController) updatePod(_ interface{}, newObj interface{}) { 749 pod, ok := newObj.(*core.Pod) 750 if !ok { 751 klog.Errorf("[spd] cannot convert obj to *core.Pod") 752 return 753 } 754 sc.enqueuePod(pod) 755 } 756 757 func (sc *SPDController) enqueuePod(pod *core.Pod) { 758 name, err := util.GetPodSPDName(pod.ObjectMeta) 759 if err != nil { 760 return 761 } 762 763 spd, err := sc.spdLister.ServiceProfileDescriptors(pod.Namespace).Get(name) 764 if err != nil { 765 return 766 } 767 sc.enqueueSPD(spd) 768 } 769 770 func (sc *SPDController) updateSPDAnnotations(spd *apiworkload.ServiceProfileDescriptor) error { 771 err := sc.updateBaselineSentinel(spd) 772 if err != nil { 773 return err 774 } 775 776 err = sc.updateHash(spd) 777 if err != nil { 778 return err 779 } 780 781 return nil 782 } 783 784 func (sc *SPDController) updateHash(spd *apiworkload.ServiceProfileDescriptor) error { 785 hash, err := util.CalculateSPDHash(spd) 786 if err != nil { 787 return err 788 } 789 790 util.SetSPDHash(spd, hash) 791 return nil 792 } 793 794 func podTransformerFunc(src, dest *core.Pod) { 795 dest.Spec.NodeName = src.Spec.NodeName 796 dest.Status.Phase = src.Status.Phase 797 containerStatusesTransformerFunc(&src.Status.ContainerStatuses, &dest.Status.ContainerStatuses) 798 } 799 800 func containerStatusesTransformerFunc(src, dst *[]core.ContainerStatus) { 801 if src == nil || len(*src) == 0 { 802 return 803 } 804 805 if len(*dst) == 0 { 806 *dst = make([]core.ContainerStatus, len(*src)) 807 } 808 809 for i, c := range *src { 810 (*dst)[i].State = c.State 811 } 812 }