github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/vpa/vpa_status.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vpa 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 v1 "k8s.io/api/core/v1" 25 apiequality "k8s.io/apimachinery/pkg/api/equality" 26 "k8s.io/apimachinery/pkg/api/errors" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/apimachinery/pkg/runtime/schema" 29 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 30 "k8s.io/apimachinery/pkg/util/wait" 31 corelisters "k8s.io/client-go/listers/core/v1" 32 "k8s.io/client-go/tools/cache" 33 "k8s.io/client-go/util/workqueue" 34 "k8s.io/klog/v2" 35 36 apis "github.com/kubewharf/katalyst-api/pkg/apis/autoscaling/v1alpha1" 37 autoscalelister "github.com/kubewharf/katalyst-api/pkg/client/listers/autoscaling/v1alpha1" 38 katalyst_base "github.com/kubewharf/katalyst-core/cmd/base" 39 "github.com/kubewharf/katalyst-core/pkg/client/control" 40 "github.com/kubewharf/katalyst-core/pkg/config/controller" 41 "github.com/kubewharf/katalyst-core/pkg/controller/vpa/util" 42 "github.com/kubewharf/katalyst-core/pkg/metrics" 43 katalystutil "github.com/kubewharf/katalyst-core/pkg/util" 44 "github.com/kubewharf/katalyst-core/pkg/util/native" 45 ) 46 47 const ( 48 metricNameVAPControlVPAUpdateStatusCosts = "vpa_vpa_update_resource_costs" 49 ) 50 51 type vpaStatusController struct { 52 ctx context.Context 53 conf *controller.VPAConfig 54 55 vpaIndexer cache.Indexer 56 podIndexer cache.Indexer 57 58 podLister corelisters.PodLister 59 vpaLister autoscalelister.KatalystVerticalPodAutoscalerLister 60 61 syncedFunc []cache.InformerSynced 62 vpaSyncQueue workqueue.RateLimitingInterface 63 vpaStatusSyncWorkers int 64 65 workloadLister map[schema.GroupVersionKind]cache.GenericLister 66 67 vpaUpdater control.VPAUpdater 68 69 metricsEmitter metrics.MetricEmitter 70 } 71 72 func newVPAStatusController(ctx context.Context, controlCtx *katalyst_base.GenericContext, 73 conf *controller.VPAConfig, workloadLister map[schema.GroupVersionKind]cache.GenericLister, 74 vpaUpdater control.VPAUpdater, 75 ) *vpaStatusController { 76 podInformer := controlCtx.KubeInformerFactory.Core().V1().Pods() 77 vpaInformer := controlCtx.InternalInformerFactory.Autoscaling().V1alpha1().KatalystVerticalPodAutoscalers() 78 79 c := &vpaStatusController{ 80 ctx: ctx, 81 conf: conf, 82 vpaIndexer: vpaInformer.Informer().GetIndexer(), 83 vpaLister: vpaInformer.Lister(), 84 podIndexer: podInformer.Informer().GetIndexer(), 85 podLister: podInformer.Lister(), 86 syncedFunc: []cache.InformerSynced{ 87 podInformer.Informer().HasSynced, 88 vpaInformer.Informer().HasSynced, 89 }, 90 vpaSyncQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "vpa-status"), 91 vpaStatusSyncWorkers: conf.VPASyncWorkers, 92 vpaUpdater: vpaUpdater, 93 workloadLister: workloadLister, 94 metricsEmitter: controlCtx.EmitterPool.GetDefaultMetricsEmitter().WithTags("vpa-status"), 95 } 96 97 // we need update current container resource to vpa status, 98 // so we need watch pod update event (if the in-place updating succeeded) 99 podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 100 AddFunc: c.addPod, 101 UpdateFunc: c.updatePod, 102 }) 103 104 vpaInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 105 AddFunc: c.addVPA, 106 UpdateFunc: c.updateVPA, 107 }) 108 109 return c 110 } 111 112 func (vs *vpaStatusController) run() { 113 defer utilruntime.HandleCrash() 114 defer vs.vpaSyncQueue.ShutDown() 115 116 defer klog.Infof("[vpa-status] shutting down vpa status collector") 117 118 if !cache.WaitForCacheSync(vs.ctx.Done(), vs.syncedFunc...) { 119 utilruntime.HandleError(fmt.Errorf("unable to sync caches for vpa status collector")) 120 return 121 } 122 123 klog.Infof("[vpa-status] caches are synced for vpa status collector") 124 125 for i := 0; i < vs.vpaStatusSyncWorkers; i++ { 126 go wait.Until(vs.vpaWorker, time.Second, vs.ctx.Done()) 127 } 128 129 <-vs.ctx.Done() 130 } 131 132 func (vs *vpaStatusController) addVPA(obj interface{}) { 133 v, ok := obj.(*apis.KatalystVerticalPodAutoscaler) 134 if !ok { 135 klog.Errorf("[vpa-status] cannot convert obj to *apis.VerticalPodAutoscaler: %v", obj) 136 return 137 } 138 139 klog.V(6).Infof("[vpa-status] notice addition of VerticalPodAutoscaler %s", v.Name) 140 vs.enqueueVPA(v) 141 } 142 143 func (vs *vpaStatusController) updateVPA(old, cur interface{}) { 144 oldVPA, ok := old.(*apis.KatalystVerticalPodAutoscaler) 145 if !ok { 146 klog.Errorf("[vpa-status] cannot convert oldObj to *apis.VerticalPodAutoscaler: %v", old) 147 return 148 } 149 150 curVPA, ok := cur.(*apis.KatalystVerticalPodAutoscaler) 151 if !ok { 152 klog.Errorf("[vpa-status] cannot convert curObj to *apis.VerticalPodAutoscaler: %v", cur) 153 return 154 } 155 156 if apiequality.Semantic.DeepEqual(oldVPA.Status, curVPA.Status) { 157 return 158 } 159 160 klog.V(6).Infof("[vpa-status] notice update of vpa %s", native.GenerateUniqObjectNameKey(curVPA)) 161 vs.enqueueVPA(curVPA) 162 } 163 164 func (vs *vpaStatusController) addPod(obj interface{}) { 165 pod, ok := obj.(*v1.Pod) 166 if !ok { 167 klog.Errorf("[vpa-status] cannot convert obj to *core.Pod: %v", obj) 168 return 169 } 170 171 vpa, err := katalystutil.GetVPAForPod(pod, vs.vpaIndexer, vs.workloadLister, vs.vpaLister) 172 if err != nil { 173 klog.V(6).Infof("[vpa-status] didn't to find vpa of pod %s, err: %v", native.GenerateUniqObjectNameKey(pod), err) 174 return 175 } 176 177 klog.V(6).Infof("[vpa-status] notice addition of pod %s", native.GenerateUniqObjectNameKey(pod)) 178 vs.enqueueVPA(vpa) 179 } 180 181 func (vs *vpaStatusController) updatePod(old interface{}, cur interface{}) { 182 oldPod, ok := old.(*v1.Pod) 183 if !ok { 184 klog.Errorf("[vpa-status] cannot convert obj to *core.Pod: %v", cur) 185 return 186 } 187 188 curPod, ok := cur.(*v1.Pod) 189 if !ok { 190 klog.Errorf("[vpa-status] cannot convert obj to *core.Pod: %v", cur) 191 return 192 } 193 194 // only when pod status or spec.containers has been changed, in-place update resource may be completed, 195 // so it only enqueue vpa to collect vpa status when they are different with old one 196 if apiequality.Semantic.DeepEqual(oldPod.Status, curPod.Status) && 197 apiequality.Semantic.DeepEqual(oldPod.Spec.Containers, curPod.Spec.Containers) { 198 return 199 } 200 201 vpa, err := katalystutil.GetVPAForPod(curPod, vs.vpaIndexer, vs.workloadLister, vs.vpaLister) 202 if err != nil { 203 klog.V(6).Infof("[vpa-status] didn't to find vpa of pod %s, err: %v", 204 native.GenerateUniqObjectNameKey(curPod), err) 205 return 206 } 207 208 klog.V(6).Infof("[vpa-status] notice update of pod %s", native.GenerateUniqObjectNameKey(curPod)) 209 vs.enqueueVPA(vpa) 210 } 211 212 func (vs *vpaStatusController) enqueueVPA(vpa *apis.KatalystVerticalPodAutoscaler) { 213 if vpa == nil { 214 klog.Warning("[vpa-status] trying to enqueueVPA a nil VPA") 215 return 216 } 217 218 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(vpa) 219 if err != nil { 220 utilruntime.HandleError(fmt.Errorf("couldn't get key for object %#v: %v", vpa, err)) 221 return 222 } 223 224 vs.vpaSyncQueue.Add(key) 225 } 226 227 func (vs *vpaStatusController) vpaWorker() { 228 for vs.processNextVPA() { 229 } 230 } 231 232 func (vs *vpaStatusController) processNextVPA() bool { 233 key, quit := vs.vpaSyncQueue.Get() 234 if quit { 235 return false 236 } 237 defer vs.vpaSyncQueue.Done(key) 238 239 err := vs.syncVPA(key.(string)) 240 if err == nil { 241 vs.vpaSyncQueue.Forget(key) 242 return true 243 } 244 245 utilruntime.HandleError(fmt.Errorf("sync %q failed with %v", key, err)) 246 vs.vpaSyncQueue.AddRateLimited(key) 247 248 return true 249 } 250 251 func (vs *vpaStatusController) syncVPA(key string) error { 252 namespace, name, err := cache.SplitMetaNamespaceKey(key) 253 if err != nil { 254 klog.Errorf("[vpa-status] failed to split namespace and name from key %s", key) 255 return err 256 } 257 258 begin := time.Now() 259 defer func() { 260 now := time.Now() 261 costs := now.Sub(begin).Microseconds() 262 klog.V(3).Infof("[vpa-status] [%v/%v] %v costs %v us", namespace, name, metricNameVAPControlVPAUpdateStatusCosts, costs) 263 _ = vs.metricsEmitter.StoreInt64(metricNameVAPControlVPAUpdateStatusCosts, costs, metrics.MetricTypeNameRaw, []metrics.MetricTag{ 264 {Key: "vpa_namespace", Val: namespace}, 265 {Key: "vpa_name", Val: name}, 266 }...) 267 }() 268 269 vpa, err := vs.vpaLister.KatalystVerticalPodAutoscalers(namespace).Get(name) 270 if err != nil { 271 klog.Errorf("[vpa-status] vpa %s/%s get error: %v", namespace, name, err) 272 if errors.IsNotFound(err) { 273 return nil 274 } 275 return err 276 } 277 278 gvk := schema.FromAPIVersionAndKind(vpa.Spec.TargetRef.APIVersion, vpa.Spec.TargetRef.Kind) 279 workloadLister, ok := vs.workloadLister[gvk] 280 if !ok { 281 klog.Errorf("[vpa-status] vpa %s/%s without workload lister %v", namespace, name, gvk) 282 return nil 283 } 284 285 pods, err := katalystutil.GetPodListForVPA(vpa, vs.podIndexer, vs.conf.VPAPodLabelIndexerKeys, workloadLister, vs.podLister) 286 if err != nil { 287 klog.Errorf("[vpa-status] failed to get pods by vpa %s, err %v", vpa.Name, err) 288 return err 289 } 290 291 // get pod resources and container resources according to current pods 292 vpaPodResources, vpaContainerResources, err := util.GetVPAResourceStatusWithCurrent(vpa, pods) 293 if err != nil { 294 klog.Errorf("[vpa-status] get vpa status with current pods err: %v", err) 295 return err 296 } 297 298 vpaNew := vpa.DeepCopy() 299 vpaNew.Status.PodResources = vpaPodResources 300 vpaNew.Status.ContainerResources = vpaContainerResources 301 302 // set RecommendationApplied condition, based on whether all pods for this vpa 303 // are updated to the expected resources in their annotations 304 err = vs.setRecommendationAppliedCondition(vpaNew, pods) 305 if err != nil { 306 klog.Errorf("[vpa-status] set recommendation applied condition failed: %v", err) 307 return err 308 } 309 310 // skip to update status if no change happened 311 if apiequality.Semantic.DeepEqual(vpa.Status, vpaNew.Status) { 312 return nil 313 } 314 315 _, err = vs.vpaUpdater.UpdateVPAStatus(vs.ctx, vpaNew, metav1.UpdateOptions{}) 316 if err != nil { 317 klog.Errorf("[vpa-status] update vpa status err: %v", err) 318 return err 319 } 320 321 return nil 322 } 323 324 // setRecommendationAppliedCondition set vpa recommendation applied condition by checking all pods whether 325 // are updated to the expected resources in their annotations 326 func (vs *vpaStatusController) setRecommendationAppliedCondition(vpa *apis.KatalystVerticalPodAutoscaler, pods []*v1.Pod) error { 327 failedCount := 0 328 for _, pod := range pods { 329 if !katalystutil.CheckPodSpecUpdated(pod) { 330 failedCount += 1 331 } 332 } 333 334 if failedCount == 0 { 335 err := util.SetVPAConditions(vpa, apis.RecommendationApplied, v1.ConditionTrue, util.VPAConditionReasonPodSpecUpdated, "") 336 if err != nil { 337 return err 338 } 339 } else { 340 msg := fmt.Sprintf("failed to update %d pods, total %d pods", failedCount, len(pods)) 341 err := util.SetVPAConditions(vpa, apis.RecommendationApplied, v1.ConditionFalse, util.VPAConditionReasonPodSpecNoUpdate, msg) 342 if err != nil { 343 return err 344 } 345 } 346 return nil 347 }