github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/vpa/recommend.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vpa 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 "k8s.io/apimachinery/pkg/api/errors" 25 apierrors "k8s.io/apimachinery/pkg/api/errors" 26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 "k8s.io/apimachinery/pkg/labels" 28 "k8s.io/apimachinery/pkg/runtime/schema" 29 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 30 "k8s.io/apimachinery/pkg/util/wait" 31 coreListers "k8s.io/client-go/listers/core/v1" 32 "k8s.io/client-go/tools/cache" 33 "k8s.io/client-go/util/workqueue" 34 "k8s.io/klog/v2" 35 36 apis "github.com/kubewharf/katalyst-api/pkg/apis/autoscaling/v1alpha1" 37 autoscalelister "github.com/kubewharf/katalyst-api/pkg/client/listers/autoscaling/v1alpha1" 38 workloadlister "github.com/kubewharf/katalyst-api/pkg/client/listers/workload/v1alpha1" 39 katalystbase "github.com/kubewharf/katalyst-core/cmd/base" 40 "github.com/kubewharf/katalyst-core/pkg/client/control" 41 "github.com/kubewharf/katalyst-core/pkg/config/controller" 42 "github.com/kubewharf/katalyst-core/pkg/config/generic" 43 "github.com/kubewharf/katalyst-core/pkg/consts" 44 "github.com/kubewharf/katalyst-core/pkg/controller/vpa/algorithm" 45 "github.com/kubewharf/katalyst-core/pkg/controller/vpa/algorithm/recommenders" 46 "github.com/kubewharf/katalyst-core/pkg/metrics" 47 katalystutil "github.com/kubewharf/katalyst-core/pkg/util" 48 "github.com/kubewharf/katalyst-core/pkg/util/general" 49 "github.com/kubewharf/katalyst-core/pkg/util/native" 50 ) 51 52 const resourceRecommendControllerName = "resourceRecommend" 53 54 const metricNameRecommendControlVPASyncCosts = "res_rec_vpa_sync_costs" 55 56 // rs stores all the in-tree recommendation algorithm implementations 57 var rs = []algorithm.ResourceRecommender{ 58 recommenders.NewCPURecommender(), 59 } 60 61 func init() { 62 for _, r := range rs { 63 algorithm.RegisterRecommender(r) 64 } 65 } 66 67 // ResourceRecommendController is responsible to use in-tree algorithm implementations 68 // to export those recommended results to vpa-rec according to vpa config. 69 // 70 // although we use informer index mechanism to speed up the looking 71 // efficiency, we can't assume that all function callers MUST use an 72 // indexed informer to look up objects. 73 type ResourceRecommendController struct { 74 ctx context.Context 75 conf *controller.VPAConfig 76 77 vpaUpdater control.VPAUpdater 78 vpaRecUpdater control.VPARecommendationUpdater 79 80 spdIndexer cache.Indexer 81 vpaRecIndexer cache.Indexer 82 podIndexer cache.Indexer 83 84 podLister coreListers.PodLister 85 spdLister workloadlister.ServiceProfileDescriptorLister 86 vpaLister autoscalelister.KatalystVerticalPodAutoscalerLister 87 vpaRecLister autoscalelister.VerticalPodAutoscalerRecommendationLister 88 workloadLister map[schema.GroupVersionKind]cache.GenericLister 89 90 syncedFunc []cache.InformerSynced 91 vpaQueue workqueue.RateLimitingInterface 92 93 metricsEmitter metrics.MetricEmitter 94 95 vpaSyncWorkers int 96 } 97 98 func NewResourceRecommendController(ctx context.Context, controlCtx *katalystbase.GenericContext, 99 genericConf *generic.GenericConfiguration, _ *controller.GenericControllerConfiguration, 100 config *controller.VPAConfig, 101 ) (*ResourceRecommendController, error) { 102 if controlCtx == nil { 103 return nil, fmt.Errorf("controlCtx is invalid") 104 } 105 106 podInformer := controlCtx.KubeInformerFactory.Core().V1().Pods() 107 spdInformer := controlCtx.InternalInformerFactory.Workload().V1alpha1().ServiceProfileDescriptors() 108 vpaInformer := controlCtx.InternalInformerFactory.Autoscaling().V1alpha1().KatalystVerticalPodAutoscalers() 109 vpaRecInformer := controlCtx.InternalInformerFactory.Autoscaling().V1alpha1().VerticalPodAutoscalerRecommendations() 110 111 genericClient := controlCtx.Client 112 recController := &ResourceRecommendController{ 113 ctx: ctx, 114 conf: config, 115 vpaUpdater: &control.DummyVPAUpdater{}, 116 vpaRecUpdater: &control.DummyVPARecommendationUpdater{}, 117 spdIndexer: spdInformer.Informer().GetIndexer(), 118 vpaRecIndexer: vpaRecInformer.Informer().GetIndexer(), 119 podIndexer: podInformer.Informer().GetIndexer(), 120 podLister: podInformer.Lister(), 121 spdLister: spdInformer.Lister(), 122 vpaLister: vpaInformer.Lister(), 123 vpaRecLister: vpaRecInformer.Lister(), 124 workloadLister: make(map[schema.GroupVersionKind]cache.GenericLister), 125 vpaQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "vpa"), 126 syncedFunc: []cache.InformerSynced{ 127 podInformer.Informer().HasSynced, 128 spdInformer.Informer().HasSynced, 129 vpaInformer.Informer().HasSynced, 130 vpaRecInformer.Informer().HasSynced, 131 }, 132 vpaSyncWorkers: config.VPASyncWorkers, 133 } 134 135 for _, wf := range controlCtx.DynamicResourcesManager.GetDynamicInformers() { 136 recController.workloadLister[wf.GVK] = wf.Informer.Lister() 137 recController.syncedFunc = append(recController.syncedFunc, wf.Informer.Informer().HasSynced) 138 } 139 140 klog.Infof("vpa resync period %v", config.VPAReSyncPeriod) 141 142 vpaInformer.Informer().AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{ 143 AddFunc: recController.addVPA, 144 UpdateFunc: recController.updateVPA, 145 }, config.VPAReSyncPeriod) 146 147 // build index: workload ---> spd 148 if _, ok := spdInformer.Informer().GetIndexer().GetIndexers()[consts.TargetReferenceIndex]; !ok { 149 err := spdInformer.Informer().GetIndexer().AddIndexers(cache.Indexers{ 150 consts.TargetReferenceIndex: katalystutil.SPDTargetReferenceIndex, 151 }) 152 if err != nil { 153 klog.Errorf("failed to add spd target reference index: %v", err) 154 return nil, err 155 } 156 } 157 158 // build index: vpa ---> vpaRec 159 if _, ok := vpaRecInformer.Informer().GetIndexer().GetIndexers()[consts.OwnerReferenceIndex]; !ok { 160 err := vpaRecInformer.Informer().GetIndexer().AddIndexers(cache.Indexers{ 161 consts.OwnerReferenceIndex: native.ObjectOwnerReferenceIndex, 162 }) 163 if err != nil { 164 klog.Errorf("[vpa-rec] failed to add owner vpa index: %v", err) 165 return nil, err 166 } 167 } 168 169 // build index: workload ---> pod 170 for _, key := range config.VPAPodLabelIndexerKeys { 171 indexer := native.PodLabelIndexer(key) 172 if _, ok := recController.podIndexer.GetIndexers()[key]; !ok { 173 err := recController.podIndexer.AddIndexers(cache.Indexers{ 174 key: indexer.IndexFunc, 175 }) 176 if err != nil { 177 klog.Errorf("[vpa-rec] failed to add label index for pod: %v", err) 178 return nil, err 179 } 180 } 181 } 182 183 recController.metricsEmitter = controlCtx.EmitterPool.GetDefaultMetricsEmitter() 184 if recController.metricsEmitter == nil { 185 recController.metricsEmitter = metrics.DummyMetrics{} 186 } 187 188 if !genericConf.DryRun { 189 recController.vpaUpdater = control.NewRealVPAUpdater(genericClient.InternalClient) 190 recController.vpaRecUpdater = control.NewRealVPARecommendationUpdater(genericClient.InternalClient) 191 } 192 193 return recController, nil 194 } 195 196 func (rrc *ResourceRecommendController) Run() { 197 defer utilruntime.HandleCrash() 198 defer rrc.vpaQueue.ShutDown() 199 200 defer klog.Infof("[resource-rec] shutting down %s controller", resourceRecommendControllerName) 201 202 if !cache.WaitForCacheSync(rrc.ctx.Done(), rrc.syncedFunc...) { 203 utilruntime.HandleError(fmt.Errorf("unable to sync caches for %s controller", resourceRecommendControllerName)) 204 return 205 } 206 klog.Infof("[resource-rec] caches are synced for %s controller", resourceRecommendControllerName) 207 klog.Infof("[resource-rec] start %d workers for %s controller", rrc.vpaSyncWorkers, resourceRecommendControllerName) 208 209 for i := 0; i < rrc.vpaSyncWorkers; i++ { 210 go wait.Until(rrc.vpaWorker, time.Second, rrc.ctx.Done()) 211 } 212 213 <-rrc.ctx.Done() 214 } 215 216 func (rrc *ResourceRecommendController) addVPA(obj interface{}) { 217 v, ok := obj.(*apis.KatalystVerticalPodAutoscaler) 218 if !ok { 219 klog.Errorf("[resource-rec] cannot convert obj to *apis.VerticalPodAutoscaler: %v", obj) 220 return 221 } 222 223 klog.V(4).Infof("[resource-rec] notice addition of vpa %s", v.Name) 224 rrc.enqueueVPA(v) 225 } 226 227 func (rrc *ResourceRecommendController) updateVPA(_, cur interface{}) { 228 v, ok := cur.(*apis.KatalystVerticalPodAutoscaler) 229 if !ok { 230 klog.Errorf("[resource-rec] cannot convert curObj to *apis.VerticalPodAutoscaler: %v", cur) 231 return 232 } 233 234 klog.V(4).Infof("[resource-rec] notice update of vpa %s", v.Name) 235 rrc.enqueueVPA(v) 236 } 237 238 func (rrc *ResourceRecommendController) enqueueVPA(vpa *apis.KatalystVerticalPodAutoscaler) { 239 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(vpa) 240 if err != nil { 241 utilruntime.HandleError(fmt.Errorf("couldn't get key for object %#v: %v", vpa, err)) 242 return 243 } 244 rrc.vpaQueue.Add(key) 245 } 246 247 func (rrc *ResourceRecommendController) vpaWorker() { 248 for rrc.processNextVPA() { 249 } 250 } 251 252 func (rrc *ResourceRecommendController) processNextVPA() bool { 253 key, quit := rrc.vpaQueue.Get() 254 if quit { 255 return false 256 } 257 defer rrc.vpaQueue.Done(key) 258 259 err := rrc.syncVPA(key.(string)) 260 if err == nil { 261 rrc.vpaQueue.Forget(key) 262 return true 263 } 264 265 utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err)) 266 rrc.vpaQueue.AddRateLimited(key) 267 268 return true 269 } 270 271 // syncVPA is mainly responsible to calculate resource recommendation for each vpa (with 272 // recommender setting as in-tree algorithms); since we will re-sync periodicallly, we 273 // won't return error in this function. 274 func (rrc *ResourceRecommendController) syncVPA(key string) error { 275 namespace, name, err := cache.SplitMetaNamespaceKey(key) 276 if err != nil { 277 klog.Errorf("[resource-rec] failed to split namespace and name from key %s", key) 278 return err 279 } 280 281 begin := time.Now() 282 defer func() { 283 costs := time.Since(begin).Microseconds() 284 klog.Infof("[resource-rec] syncing vpa [%v/%v] costs %v us", namespace, name, costs) 285 _ = rrc.metricsEmitter.StoreInt64(metricNameRecommendControlVPASyncCosts, costs, metrics.MetricTypeNameRaw, 286 metrics.MetricTag{Key: "vpa_namespace", Val: namespace}, 287 metrics.MetricTag{Key: "vpa_name", Val: name}, 288 ) 289 }() 290 291 vpa, err := rrc.vpaLister.KatalystVerticalPodAutoscalers(namespace).Get(name) 292 if err != nil { 293 if errors.IsNotFound(err) { 294 klog.Warningf("[resource-rec] vpa %s/%s is not found", namespace, name) 295 return nil 296 } 297 298 klog.Errorf("[resource-rec] vpa %s/%s get error: %v", namespace, name, err) 299 return nil 300 } 301 klog.V(4).Infof("[resource-rec] syncing vpa %s", vpa.Name) 302 303 gvk := schema.FromAPIVersionAndKind(vpa.Spec.TargetRef.APIVersion, vpa.Spec.TargetRef.Kind) 304 workloadLister, ok := rrc.workloadLister[gvk] 305 if !ok { 306 klog.Errorf("[resource-rec] vpa %s/%s without workload lister", namespace, name) 307 return nil 308 } 309 310 recommender := vpa.Spec.ResourcePolicy.AlgorithmPolicy.Recommender 311 r, ok := algorithm.GetRecommender()[recommender] 312 if !ok { 313 klog.V(8).ErrorS(nil, fmt.Sprintf("[resource-rec] recommender %v not supported", recommender)) 314 return nil 315 } 316 317 pods, err := katalystutil.GetPodListForVPA(vpa, rrc.podIndexer, rrc.conf.VPAPodLabelIndexerKeys, workloadLister, rrc.podLister) 318 if err != nil { 319 klog.Errorf("[resource-rec] get pods for vpa %s/%s error: %v", namespace, name, err) 320 return nil 321 } 322 323 spd, err := katalystutil.GetSPDForVPA(vpa, rrc.spdIndexer, workloadLister, rrc.spdLister) 324 if err != nil { 325 klog.Warningf("[resource-rec] get spd for vpa %s/%s error: %v", namespace, name, err) 326 return nil 327 } 328 329 podResources, containerResources, err := r.GetRecommendedPodResources(spd, pods) 330 if err != nil { 331 klog.Errorf("[resource-rec] calculate resources for vpa %s/%s error: %v", namespace, name, err) 332 return nil 333 } 334 335 vpaRec, err := rrc.getOrCreateVpaRec(vpa) 336 if err != nil { 337 klog.Errorf("[resource-rec] get vpaRec for vpa %s/%s error: %v", namespace, name, err) 338 return nil 339 } 340 341 vpaRecNew := vpaRec.DeepCopy() 342 vpaRecNew.Spec.PodRecommendations = podResources 343 vpaRecNew.Spec.ContainerRecommendations = containerResources 344 err = rrc.vpaRecUpdater.PatchVPARecommendation(rrc.ctx, vpaRec, vpaRecNew) 345 if err != nil { 346 klog.Errorf("[resource-rec] get vpaRec for vpa %s/%s error: %v", namespace, name, err) 347 return nil 348 } 349 return nil 350 } 351 352 // cleanVPARec is mainly responsible to clean all vpaRec CR that should not exist 353 func (rrc *ResourceRecommendController) cleanVPARec() { 354 recList, err := rrc.vpaRecLister.List(labels.Everything()) 355 if err != nil { 356 klog.Errorf("[resource-rec] failed to list all vpaRec: %v", err) 357 } 358 359 for _, vpaRec := range recList { 360 needDelete := false 361 vpa, err := katalystutil.GetVPAForVPARec(vpaRec, rrc.vpaLister) 362 if err != nil { 363 if errors.IsNotFound(err) { 364 needDelete = true 365 } else { 366 klog.Errorf("[resource-rec] get vpa for vpaRec %s error: %v", vpaRec.Name, err) 367 } 368 } else { 369 // delete vpa-rec if the recommender field has already erased 370 recommender := vpa.Spec.ResourcePolicy.AlgorithmPolicy.Recommender 371 if recommender == "" { 372 needDelete = true 373 } 374 } 375 376 if needDelete { 377 klog.Warningf("[resource-rec] delete un-wanted vpaRec %v", vpaRec.Name) 378 if err := rrc.vpaRecUpdater.DeleteVPARecommendation(rrc.ctx, vpaRec, metav1.DeleteOptions{}); err != nil { 379 klog.Warningf("[resource-rec] delete un-wanted vpaRec %v err: %v", vpaRec.Name, err) 380 } 381 } 382 } 383 } 384 385 // getOrCreateVpaRec is used to main the in-tree vpaRec objects if it doesn't exist 386 func (rrc *ResourceRecommendController) getOrCreateVpaRec(vpa *apis.KatalystVerticalPodAutoscaler) (*apis.VerticalPodAutoscalerRecommendation, error) { 387 vpaRec, err := katalystutil.GetVPARecForVPA(vpa, rrc.vpaRecIndexer, rrc.vpaRecLister) 388 if err != nil { 389 if !apierrors.IsNotFound(err) { 390 return nil, err 391 } 392 } else { 393 return vpaRec, nil 394 } 395 396 klog.Errorf("[resource-rec] create vpaRec for vpa %s/%s", vpa.Namespace, vpa.Name) 397 ownerRef := metav1.OwnerReference{ 398 Name: vpa.GetName(), 399 Kind: vpa.GroupVersionKind().Kind, 400 APIVersion: vpa.GroupVersionKind().GroupVersion().String(), 401 UID: vpa.UID, 402 } 403 vpaRec = &apis.VerticalPodAutoscalerRecommendation{ 404 ObjectMeta: metav1.ObjectMeta{ 405 OwnerReferences: []metav1.OwnerReference{ownerRef}, 406 Namespace: vpa.GetNamespace(), 407 Name: vpa.GetName(), 408 Labels: general.DeepCopyMap(vpa.GetLabels()), 409 }, 410 Spec: apis.VerticalPodAutoscalerRecommendationSpec{}, 411 } 412 return rrc.vpaRecUpdater.CreateVPARecommendation(rrc.ctx, vpaRec, metav1.CreateOptions{}) 413 }