github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/resource-recommend/recommender/recommenders/percentile_recommender.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package recommenders 18 19 import ( 20 "strings" 21 "time" 22 23 v1 "k8s.io/api/core/v1" 24 "k8s.io/apimachinery/pkg/api/resource" 25 vpamodel "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model" 26 "k8s.io/klog/v2" 27 28 "github.com/kubewharf/katalyst-api/pkg/apis/recommendation/v1alpha1" 29 "github.com/kubewharf/katalyst-core/pkg/controller/resource-recommend/oom" 30 "github.com/kubewharf/katalyst-core/pkg/controller/resource-recommend/processor" 31 "github.com/kubewharf/katalyst-core/pkg/controller/resource-recommend/recommender" 32 "github.com/kubewharf/katalyst-core/pkg/util/general" 33 errortypes "github.com/kubewharf/katalyst-core/pkg/util/resource-recommend/types/error" 34 processortypes "github.com/kubewharf/katalyst-core/pkg/util/resource-recommend/types/processor" 35 recommendationtype "github.com/kubewharf/katalyst-core/pkg/util/resource-recommend/types/recommendation" 36 ) 37 38 type PercentileRecommender struct { 39 recommender.Recommender 40 DataProcessor processor.Processor 41 OomRecorder oom.Recorder 42 } 43 44 const ( 45 // OOMBumpUpRatio specifies how much memory will be added after observing OOM. 46 OOMBumpUpRatio float64 = 1.2 47 // OOMMinBumpUp specifies minimal increase of memory after observing OOM. 48 OOMMinBumpUp float64 = 100 * 1024 * 1024 // 100MB 49 ) 50 51 // NewPercentileRecommender returns a 52 func NewPercentileRecommender(DataProcessor processor.Processor, OomRecorder oom.Recorder) *PercentileRecommender { 53 return &PercentileRecommender{ 54 DataProcessor: DataProcessor, 55 OomRecorder: OomRecorder, 56 } 57 } 58 59 func (r *PercentileRecommender) Recommend(recommendation *recommendationtype.Recommendation) *errortypes.CustomError { 60 klog.InfoS("starting recommenders process", "recommendationConfig", recommendation.Config) 61 for _, container := range recommendation.Config.Containers { 62 containerRecommendation := v1alpha1.ContainerResources{ 63 ContainerName: container.ContainerName, 64 } 65 requests := v1alpha1.ContainerResourceList{ 66 Target: map[v1.ResourceName]resource.Quantity{}, 67 } 68 for _, containerConfig := range container.ContainerConfigs { 69 taskKey := processortypes.GetProcessKey(recommendation.NamespacedName, recommendation.Config.TargetRef, container.ContainerName, containerConfig.ControlledResource) 70 switch containerConfig.ControlledResource { 71 case v1.ResourceCPU: 72 cpuQuantity, err := r.getCpuTargetPercentileEstimationWithUsageBuffer(&taskKey, float64(containerConfig.ResourceBufferPercent)/100) 73 if err != nil { 74 return errortypes.RecommendationNotReadyError(err.Error()) 75 } 76 klog.InfoS("got recommended cpu for container", "recommendedCPU", cpuQuantity.String(), "container", container.ContainerName) 77 requests.Target[v1.ResourceCPU] = *cpuQuantity 78 case v1.ResourceMemory: 79 memQuantity, err := r.getMemTargetPercentileEstimationWithUsageBuffer(&taskKey, float64(containerConfig.ResourceBufferPercent)/100) 80 if err != nil { 81 return errortypes.RecommendationNotReadyError(err.Error()) 82 } 83 requests.Target[v1.ResourceMemory] = *memQuantity 84 } 85 } 86 containerRecommendation.Requests = &requests 87 recommendation.Recommendations = append(recommendation.Recommendations, containerRecommendation) 88 } 89 klog.InfoS("recommenders process done", "recommendation", general.StructToString(recommendation.Recommendations)) 90 return nil 91 } 92 93 func (r *PercentileRecommender) ScaleOnOOM(oomRecords []oom.OOMRecord, namespace string, workloadName string, containerName string) *resource.Quantity { 94 klog.InfoS("scaling on oom for namespace, workload, container", "namespace", namespace, "workload", workloadName, "container", containerName) 95 var oomRecord *oom.OOMRecord 96 for _, record := range oomRecords { 97 // use oomRecord for all pods in workload 98 if strings.HasPrefix(record.Pod, workloadName) && containerName == record.Container && namespace == record.Namespace { 99 oomRecord = &record 100 break 101 } 102 } 103 104 // ignore too old oom events 105 if oomRecord != nil && time.Since(oomRecord.OOMAt) <= (time.Hour*24*7) { 106 memoryOOM := oomRecord.Memory.Value() 107 var memoryNeeded vpamodel.ResourceAmount 108 memoryNeeded = vpamodel.ResourceAmountMax(vpamodel.ResourceAmount(memoryOOM)+vpamodel.MemoryAmountFromBytes(OOMMinBumpUp), 109 vpamodel.ScaleResource(vpamodel.ResourceAmount(memoryOOM), OOMBumpUpRatio)) 110 111 return r.getMemQuantity(float64(memoryNeeded)) 112 } 113 114 return nil 115 } 116 117 func (r *PercentileRecommender) getCpuTargetPercentileEstimationWithUsageBuffer(taskKey *processortypes.ProcessKey, resourceBufferPercentage float64) (quantity *resource.Quantity, err error) { 118 klog.InfoS("getting cpu estimation for namespace, workload, container, with resource buffer", "namespace", taskKey.Namespace, "workload", taskKey.WorkloadName, "container", taskKey.ContainerName, "resourceBuffer", resourceBufferPercentage) 119 cpuRecommendedValue, err := r.DataProcessor.QueryProcessedValues(taskKey) 120 if err != nil { 121 return nil, err 122 } 123 klog.InfoS("got cpu recommended value from processor", "cpuRecommendedValue", cpuRecommendedValue) 124 // scale cpu resource based on usageBuffer 125 cpuRecommendedValue = cpuRecommendedValue * (1 + resourceBufferPercentage) 126 klog.InfoS("scaled cpu recommended value for container", "container", taskKey.ContainerName, "resourceBuffer", resourceBufferPercentage, "cpuRecommendedValue", cpuRecommendedValue) 127 cpuQuantity := resource.NewMilliQuantity(int64(cpuRecommendedValue*1000), resource.DecimalSI) 128 return cpuQuantity, nil 129 } 130 131 func (r *PercentileRecommender) getMemTargetPercentileEstimationWithUsageBuffer(taskKey *processortypes.ProcessKey, resourceBufferPercentage float64) (quantity *resource.Quantity, err error) { 132 klog.InfoS("getting mem estimation for namespace, workload, container, with resource buffer", "namespace", taskKey.Namespace, "workload", taskKey.WorkloadName, "container", taskKey.ContainerName, "resourceBuffer", resourceBufferPercentage) 133 memRecommendedValue, err := r.DataProcessor.QueryProcessedValues(taskKey) 134 if err != nil { 135 return nil, err 136 } 137 klog.InfoS("got mem recommended value from processor", "memRecommendedValue", memRecommendedValue) 138 // scale mem resource based on usageBuffer 139 memRecommendedValue = memRecommendedValue * (1 + resourceBufferPercentage) 140 klog.InfoS("scaled mem recommended value for container", "container", taskKey.ContainerName, "resourceBuffer", resourceBufferPercentage, "memRecommendedValue", memRecommendedValue) 141 memQuantity := r.getMemQuantity(memRecommendedValue) 142 klog.InfoS("got recommended memory for container", "container", taskKey.ContainerName, "memory", memQuantity.String()) 143 oomRecords := r.OomRecorder.ListOOMRecords() 144 oomScaledMem := r.ScaleOnOOM(oomRecords, taskKey.Namespace, taskKey.WorkloadName, taskKey.ContainerName) 145 if oomScaledMem != nil && !oomScaledMem.IsZero() && oomScaledMem.Cmp(*memQuantity) > 0 { 146 klog.InfoS("container using oomProtect Memory", "container", taskKey.ContainerName, "oomScaledMem", oomScaledMem.String()) 147 memQuantity = oomScaledMem 148 } 149 return memQuantity, nil 150 } 151 152 func (r *PercentileRecommender) getMemQuantity(memRecommendedValue float64) (quantity *resource.Quantity) { 153 scale := int64(1) 154 quotient := int64(memRecommendedValue) 155 remainder := int64(0) 156 for scale < 1024*1024 { 157 if quotient < 1024 { 158 break 159 } 160 scale *= 1024 161 quotient = int64(memRecommendedValue) / scale 162 remainder = int64(memRecommendedValue) % scale 163 } 164 if remainder == 0 { 165 return resource.NewQuantity(quotient*scale, resource.BinarySI) 166 } 167 return resource.NewQuantity((quotient+1)*scale, resource.BinarySI) 168 }