github.com/gocrane/crane@v0.11.0/pkg/recommendation/recommender/hpa/recommend.go (about) 1 package hpa 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "math" 7 "strconv" 8 "time" 9 10 "github.com/montanaflynn/stats" 11 autoscalingv2 "k8s.io/api/autoscaling/v2beta2" 12 corev1 "k8s.io/api/core/v1" 13 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 "k8s.io/klog/v2" 15 "sigs.k8s.io/yaml" 16 17 autoscalingapi "github.com/gocrane/api/autoscaling/v1alpha1" 18 predictionapi "github.com/gocrane/api/prediction/v1alpha1" 19 20 "github.com/gocrane/crane/pkg/common" 21 "github.com/gocrane/crane/pkg/metricnaming" 22 "github.com/gocrane/crane/pkg/prediction/config" 23 "github.com/gocrane/crane/pkg/recommend/types" 24 "github.com/gocrane/crane/pkg/recommendation/framework" 25 "github.com/gocrane/crane/pkg/utils" 26 ) 27 28 const callerFormat = "HPARecommendationCaller-%s-%s" 29 30 func (rr *HPARecommender) PreRecommend(ctx *framework.RecommendationContext) error { 31 return rr.ReplicasRecommender.PreRecommend(ctx) 32 } 33 34 func (rr *HPARecommender) Recommend(ctx *framework.RecommendationContext) error { 35 return rr.ReplicasRecommender.Recommend(ctx) 36 } 37 38 // Policy add some logic for result of recommend phase. 39 func (rr *HPARecommender) Policy(ctx *framework.RecommendationContext) error { 40 predictable := true 41 42 if len(ctx.ResultValues) != 1 { 43 klog.Warningf("%s: prediction metrics data is unexpected, List length is %d ", ctx.String(), len(ctx.ResultValues)) 44 predictable = false 45 } 46 47 if rr.PredictableEnabled && !predictable { 48 return fmt.Errorf("cannot predict target") 49 } 50 51 minReplicas, cpuMax, percentileCpu, err := rr.GetMinReplicas(ctx) 52 if err != nil { 53 return err 54 } 55 56 err = rr.checkMinCpuUsageThreshold(cpuMax) 57 if err != nil { 58 return fmt.Errorf("checkMinCpuUsageThreshold failed: %v", err) 59 } 60 61 medianMin, medianMax, err := rr.minMaxMedians(ctx.InputValue(string(corev1.ResourceCPU))) 62 if err != nil { 63 return fmt.Errorf("minMaxMedians failed: %v", err) 64 } 65 66 err = rr.checkFluctuation(medianMin, medianMax) 67 if err != nil { 68 return fmt.Errorf("%s checkFluctuation failed: %v", rr.Name(), err) 69 } 70 71 targetUtilization, _, err := rr.proposeTargetUtilization(ctx) 72 if err != nil { 73 return fmt.Errorf("proposeTargetUtilization failed: %v", err) 74 } 75 76 maxReplicas, err := rr.proposeMaxReplicas(&ctx.PodTemplate, percentileCpu, targetUtilization, minReplicas) 77 if err != nil { 78 return fmt.Errorf("proposeMaxReplicas failed: %v", err) 79 } 80 81 defaultPredictionWindow := int32(3600) 82 resourceCpu := corev1.ResourceCPU 83 84 proposedEHPA := &types.EffectiveHorizontalPodAutoscalerRecommendation{ 85 MaxReplicas: &maxReplicas, 86 MinReplicas: &minReplicas, 87 Metrics: []autoscalingv2.MetricSpec{ 88 { 89 Type: autoscalingv2.ResourceMetricSourceType, 90 Resource: &autoscalingv2.ResourceMetricSource{ 91 Name: resourceCpu, 92 Target: autoscalingv2.MetricTarget{ 93 Type: autoscalingv2.UtilizationMetricType, 94 AverageUtilization: &targetUtilization, 95 }, 96 }, 97 }, 98 }, 99 } 100 101 if predictable { 102 proposedEHPA.Prediction = &autoscalingapi.Prediction{ 103 PredictionWindowSeconds: &defaultPredictionWindow, 104 PredictionAlgorithm: &autoscalingapi.PredictionAlgorithm{ 105 AlgorithmType: predictionapi.AlgorithmTypeDSP, 106 DSP: ctx.AlgorithmConfig.DSP, 107 }, 108 } 109 } 110 111 // get metric spec from existing hpa and use them 112 if rr.ReferenceHpaEnabled && ctx.HPA != nil { 113 for _, metricSpec := range ctx.HPA.Spec.Metrics { 114 // don't use resource cpu, since we already configuration it before 115 if metricSpec.Type == autoscalingv2.ResourceMetricSourceType && metricSpec.Resource != nil && metricSpec.Resource.Name == resourceCpu { 116 continue 117 } 118 119 proposedEHPA.Metrics = append(proposedEHPA.Metrics, metricSpec) 120 } 121 } 122 123 result := types.ProposedRecommendation{ 124 EffectiveHPA: proposedEHPA, 125 } 126 127 resultBytes, err := yaml.Marshal(result) 128 if err != nil { 129 return fmt.Errorf("%s marshal result failed: %v", rr.Name(), err) 130 } 131 132 ctx.Recommendation.Status.RecommendedValue = string(resultBytes) 133 if ctx.EHPA == nil { 134 ctx.Recommendation.Status.Action = "Create" 135 136 newEhpa := &autoscalingapi.EffectiveHorizontalPodAutoscaler{ 137 TypeMeta: metav1.TypeMeta{ 138 Kind: "EffectiveHorizontalPodAutoscaler", 139 APIVersion: autoscalingapi.GroupVersion.String(), 140 }, 141 ObjectMeta: metav1.ObjectMeta{ 142 Namespace: ctx.Recommendation.Spec.TargetRef.Namespace, 143 Name: ctx.Recommendation.Spec.TargetRef.Name, 144 }, 145 Spec: autoscalingapi.EffectiveHorizontalPodAutoscalerSpec{ 146 MinReplicas: proposedEHPA.MinReplicas, 147 MaxReplicas: *proposedEHPA.MaxReplicas, 148 Metrics: proposedEHPA.Metrics, 149 ScaleStrategy: autoscalingapi.ScaleStrategyPreview, 150 Prediction: proposedEHPA.Prediction, 151 ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{ 152 Kind: ctx.Recommendation.Spec.TargetRef.Kind, 153 APIVersion: ctx.Recommendation.Spec.TargetRef.APIVersion, 154 Name: ctx.Recommendation.Spec.TargetRef.Name, 155 }, 156 }, 157 } 158 159 newEhpaBytes, err := json.Marshal(newEhpa) 160 if err != nil { 161 return fmt.Errorf("marshal ehpa failed %s. ", err) 162 } 163 ctx.Recommendation.Status.RecommendedInfo = string(newEhpaBytes) 164 } else { 165 ctx.Recommendation.Status.Action = "Patch" 166 167 patchEhpa := &autoscalingapi.EffectiveHorizontalPodAutoscaler{ 168 Spec: autoscalingapi.EffectiveHorizontalPodAutoscalerSpec{ 169 MinReplicas: proposedEHPA.MinReplicas, 170 MaxReplicas: *proposedEHPA.MaxReplicas, 171 Metrics: proposedEHPA.Metrics, 172 }, 173 } 174 175 patchEhpaBytes, err := json.Marshal(patchEhpa) 176 if err != nil { 177 return fmt.Errorf("marshal ehpa failed %s. ", err) 178 } 179 ctx.Recommendation.Status.RecommendedInfo = string(patchEhpaBytes) 180 ctx.Recommendation.Status.TargetRef = corev1.ObjectReference{ 181 Namespace: ctx.Recommendation.Spec.TargetRef.Namespace, 182 Name: ctx.Recommendation.Spec.TargetRef.Name, 183 Kind: "EffectiveHorizontalPodAutoscaler", 184 APIVersion: autoscalingapi.GroupVersion.String(), 185 } 186 } 187 188 return nil 189 } 190 191 // checkMinCpuUsageThreshold check if the max cpu for target is reach to replicas.min-cpu-usage-threshold 192 func (rr *HPARecommender) checkMinCpuUsageThreshold(cpuMax float64) error { 193 klog.V(4).Infof("%s checkMinCpuUsageThreshold, cpuMax %f threshold %f", rr.Name(), cpuMax, rr.MinCpuUsageThreshold) 194 if cpuMax < rr.MinCpuUsageThreshold { 195 return fmt.Errorf("target cpuusage %f is under replicas.min-cpu-usage-threshold %f. ", cpuMax, rr.MinCpuUsageThreshold) 196 } 197 198 return nil 199 } 200 201 func (rr *HPARecommender) minMaxMedians(predictionTs []*common.TimeSeries) (float64, float64, error) { 202 // aggregate with time's hour 203 cpuUsagePredictionMap := make(map[int][]float64) 204 for _, sample := range predictionTs[0].Samples { 205 sampleTime := time.Unix(sample.Timestamp, 0) 206 if _, exist := cpuUsagePredictionMap[sampleTime.Hour()]; exist { 207 cpuUsagePredictionMap[sampleTime.Hour()] = append(cpuUsagePredictionMap[sampleTime.Hour()], sample.Value) 208 } else { 209 newUsageInHour := make([]float64, 0) 210 newUsageInHour = append(newUsageInHour, sample.Value) 211 cpuUsagePredictionMap[sampleTime.Hour()] = newUsageInHour 212 } 213 } 214 215 // use median to deburring data 216 var medianUsages []float64 217 for _, usageInHour := range cpuUsagePredictionMap { 218 medianUsage, err := stats.Median(usageInHour) 219 if err != nil { 220 return 0., 0., err 221 } 222 medianUsages = append(medianUsages, medianUsage) 223 } 224 225 medianMax := math.SmallestNonzeroFloat64 226 medianMin := math.MaxFloat64 227 for _, value := range medianUsages { 228 if value > medianMax { 229 medianMax = value 230 } 231 232 if value < medianMin { 233 medianMin = value 234 } 235 } 236 237 klog.V(4).Infof("%s minMaxMedians medianMax %f, medianMin %f, medianUsages %v", rr.Name(), medianMax, medianMin, medianUsages) 238 239 return medianMin, medianMax, nil 240 } 241 242 // checkFluctuation check if the time series fluctuation is reach to replicas.fluctuation-threshold 243 func (rr *HPARecommender) checkFluctuation(medianMin, medianMax float64) error { 244 fluctuationThreshold, err := strconv.ParseFloat(rr.Config["fluctuation-threshold"], 64) 245 if err != nil { 246 return err 247 } 248 249 if medianMin == 0 { 250 medianMin = 0.1 // use a small value to continue calculate 251 } 252 253 fluctuation := medianMax / medianMin 254 if fluctuation < fluctuationThreshold { 255 return fmt.Errorf("target cpu fluctuation %f is under replicas.fluctuation-threshold %f. ", fluctuation, fluctuationThreshold) 256 } 257 258 return nil 259 } 260 261 // proposeTargetUtilization use the 99 percentile cpu usage to propose target utilization, 262 // since we think if pod have reach the top usage before, maybe this is a suitable target to running. 263 // Considering too high or too low utilization are both invalid, we will be capping target utilization finally. 264 func (rr *HPARecommender) proposeTargetUtilization(ctx *framework.RecommendationContext) (int32, int64, error) { 265 percentilePredictor := ctx.PredictorMgr.GetPredictor(predictionapi.AlgorithmTypePercentile) 266 267 var cpuUsage float64 268 // use percentile algo to get the 99 percentile cpu usage for this target 269 for _, container := range ctx.PodTemplate.Spec.Containers { 270 caller := fmt.Sprintf(callerFormat, klog.KObj(ctx.Recommendation), ctx.Recommendation.UID) 271 metricNamer := metricnaming.ResourceToContainerMetricNamer(ctx.Recommendation.Spec.TargetRef.Namespace, ctx.Recommendation.Spec.TargetRef.APIVersion, 272 ctx.Recommendation.Spec.TargetRef.Kind, ctx.Recommendation.Spec.TargetRef.Name, container.Name, corev1.ResourceCPU, caller) 273 cpuConfig := &config.Config{ 274 Percentile: &predictionapi.Percentile{ 275 Aggregated: true, 276 HistoryLength: "168h", 277 SampleInterval: "1m", 278 MarginFraction: "0.15", 279 TargetUtilization: "1.0", 280 Percentile: "0.99", 281 Histogram: predictionapi.HistogramConfig{ 282 HalfLife: "24h", 283 BucketSize: "0.1", 284 MaxValue: "100", 285 }, 286 }, 287 } 288 tsList, err := utils.QueryPredictedValuesOnce(ctx.Recommendation, 289 percentilePredictor, 290 caller, 291 cpuConfig, 292 metricNamer) 293 if err != nil { 294 return 0, 0, err 295 } 296 if len(tsList) < 1 || len(tsList[0].Samples) < 1 { 297 return 0, 0, fmt.Errorf("no value retured for queryExpr: %s", metricNamer.BuildUniqueKey()) 298 } 299 cpuUsage += tsList[0].Samples[0].Value 300 } 301 302 requestTotal, err := utils.CalculatePodTemplateRequests(&ctx.PodTemplate, corev1.ResourceCPU) 303 if err != nil { 304 return 0, 0, err 305 } 306 307 klog.V(4).Infof("propose targetUtilization, cpuUsage %f requestsPod %d", cpuUsage, requestTotal) 308 targetUtilization := int32(math.Ceil((cpuUsage * 1000 / float64(requestTotal)) * 100)) 309 310 // capping 311 if targetUtilization < int32(rr.MinCpuTargetUtilization) { 312 targetUtilization = int32(rr.MinCpuTargetUtilization) 313 } 314 315 // capping 316 if targetUtilization > int32(rr.MaxCpuTargetUtilization) { 317 targetUtilization = int32(rr.MaxCpuTargetUtilization) 318 } 319 320 return targetUtilization, requestTotal, nil 321 } 322 323 // proposeMaxReplicas use max cpu usage to compare with target pod cpu usage to get the max replicas. 324 func (rr *HPARecommender) proposeMaxReplicas(podTemplate *corev1.PodTemplateSpec, percentileCpu float64, targetUtilization int32, minReplicas int32) (int32, error) { 325 requestsPod, err := utils.CalculatePodTemplateRequests(podTemplate, corev1.ResourceCPU) 326 if err != nil { 327 return 0, err 328 } 329 330 klog.V(4).Infof("proposeMaxReplicas, percentileCpu %f requestsPod %d targetUtilization %d", percentileCpu, requestsPod, targetUtilization) 331 332 // request * targetUtilization is the target average cpu usage, use total p95thCpu to divide, we can get the expect max replicas. 333 calcMaxReplicas := (percentileCpu * 100 * 1000 * rr.MaxReplicasFactor) / float64(int32(requestsPod)*targetUtilization) 334 maxReplicas := int32(math.Ceil(calcMaxReplicas)) 335 336 // maxReplicas should be always larger than minReplicas 337 if maxReplicas < minReplicas { 338 maxReplicas = minReplicas 339 } 340 341 return maxReplicas, nil 342 }