github.com/kubewharf/katalyst-core@v0.5.3/pkg/controller/resource-recommend/recommender/recommenders/percentile_recommender.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package recommenders
    18  
    19  import (
    20  	"strings"
    21  	"time"
    22  
    23  	v1 "k8s.io/api/core/v1"
    24  	"k8s.io/apimachinery/pkg/api/resource"
    25  	vpamodel "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model"
    26  	"k8s.io/klog/v2"
    27  
    28  	"github.com/kubewharf/katalyst-api/pkg/apis/recommendation/v1alpha1"
    29  	"github.com/kubewharf/katalyst-core/pkg/controller/resource-recommend/oom"
    30  	"github.com/kubewharf/katalyst-core/pkg/controller/resource-recommend/processor"
    31  	"github.com/kubewharf/katalyst-core/pkg/controller/resource-recommend/recommender"
    32  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    33  	errortypes "github.com/kubewharf/katalyst-core/pkg/util/resource-recommend/types/error"
    34  	processortypes "github.com/kubewharf/katalyst-core/pkg/util/resource-recommend/types/processor"
    35  	recommendationtype "github.com/kubewharf/katalyst-core/pkg/util/resource-recommend/types/recommendation"
    36  )
    37  
    38  type PercentileRecommender struct {
    39  	recommender.Recommender
    40  	DataProcessor processor.Processor
    41  	OomRecorder   oom.Recorder
    42  }
    43  
    44  const (
    45  	// OOMBumpUpRatio specifies how much memory will be added after observing OOM.
    46  	OOMBumpUpRatio float64 = 1.2
    47  	// OOMMinBumpUp specifies minimal increase of memory after observing OOM.
    48  	OOMMinBumpUp float64 = 100 * 1024 * 1024 // 100MB
    49  )
    50  
    51  // NewPercentileRecommender returns a
    52  func NewPercentileRecommender(DataProcessor processor.Processor, OomRecorder oom.Recorder) *PercentileRecommender {
    53  	return &PercentileRecommender{
    54  		DataProcessor: DataProcessor,
    55  		OomRecorder:   OomRecorder,
    56  	}
    57  }
    58  
    59  func (r *PercentileRecommender) Recommend(recommendation *recommendationtype.Recommendation) *errortypes.CustomError {
    60  	klog.InfoS("starting recommenders process", "recommendationConfig", recommendation.Config)
    61  	for _, container := range recommendation.Config.Containers {
    62  		containerRecommendation := v1alpha1.ContainerResources{
    63  			ContainerName: container.ContainerName,
    64  		}
    65  		requests := v1alpha1.ContainerResourceList{
    66  			Target: map[v1.ResourceName]resource.Quantity{},
    67  		}
    68  		for _, containerConfig := range container.ContainerConfigs {
    69  			taskKey := processortypes.GetProcessKey(recommendation.NamespacedName, recommendation.Config.TargetRef, container.ContainerName, containerConfig.ControlledResource)
    70  			switch containerConfig.ControlledResource {
    71  			case v1.ResourceCPU:
    72  				cpuQuantity, err := r.getCpuTargetPercentileEstimationWithUsageBuffer(&taskKey, float64(containerConfig.ResourceBufferPercent)/100)
    73  				if err != nil {
    74  					return errortypes.RecommendationNotReadyError(err.Error())
    75  				}
    76  				klog.InfoS("got recommended cpu for container", "recommendedCPU", cpuQuantity.String(), "container", container.ContainerName)
    77  				requests.Target[v1.ResourceCPU] = *cpuQuantity
    78  			case v1.ResourceMemory:
    79  				memQuantity, err := r.getMemTargetPercentileEstimationWithUsageBuffer(&taskKey, float64(containerConfig.ResourceBufferPercent)/100)
    80  				if err != nil {
    81  					return errortypes.RecommendationNotReadyError(err.Error())
    82  				}
    83  				requests.Target[v1.ResourceMemory] = *memQuantity
    84  			}
    85  		}
    86  		containerRecommendation.Requests = &requests
    87  		recommendation.Recommendations = append(recommendation.Recommendations, containerRecommendation)
    88  	}
    89  	klog.InfoS("recommenders process done", "recommendation", general.StructToString(recommendation.Recommendations))
    90  	return nil
    91  }
    92  
    93  func (r *PercentileRecommender) ScaleOnOOM(oomRecords []oom.OOMRecord, namespace string, workloadName string, containerName string) *resource.Quantity {
    94  	klog.InfoS("scaling on oom for namespace, workload, container", "namespace", namespace, "workload", workloadName, "container", containerName)
    95  	var oomRecord *oom.OOMRecord
    96  	for _, record := range oomRecords {
    97  		// use oomRecord for all pods in workload
    98  		if strings.HasPrefix(record.Pod, workloadName) && containerName == record.Container && namespace == record.Namespace {
    99  			oomRecord = &record
   100  			break
   101  		}
   102  	}
   103  
   104  	// ignore too old oom events
   105  	if oomRecord != nil && time.Since(oomRecord.OOMAt) <= (time.Hour*24*7) {
   106  		memoryOOM := oomRecord.Memory.Value()
   107  		var memoryNeeded vpamodel.ResourceAmount
   108  		memoryNeeded = vpamodel.ResourceAmountMax(vpamodel.ResourceAmount(memoryOOM)+vpamodel.MemoryAmountFromBytes(OOMMinBumpUp),
   109  			vpamodel.ScaleResource(vpamodel.ResourceAmount(memoryOOM), OOMBumpUpRatio))
   110  
   111  		return r.getMemQuantity(float64(memoryNeeded))
   112  	}
   113  
   114  	return nil
   115  }
   116  
   117  func (r *PercentileRecommender) getCpuTargetPercentileEstimationWithUsageBuffer(taskKey *processortypes.ProcessKey, resourceBufferPercentage float64) (quantity *resource.Quantity, err error) {
   118  	klog.InfoS("getting cpu estimation for namespace, workload, container, with resource buffer", "namespace", taskKey.Namespace, "workload", taskKey.WorkloadName, "container", taskKey.ContainerName, "resourceBuffer", resourceBufferPercentage)
   119  	cpuRecommendedValue, err := r.DataProcessor.QueryProcessedValues(taskKey)
   120  	if err != nil {
   121  		return nil, err
   122  	}
   123  	klog.InfoS("got cpu recommended value from processor", "cpuRecommendedValue", cpuRecommendedValue)
   124  	// scale cpu resource based on usageBuffer
   125  	cpuRecommendedValue = cpuRecommendedValue * (1 + resourceBufferPercentage)
   126  	klog.InfoS("scaled cpu recommended value for container", "container", taskKey.ContainerName, "resourceBuffer", resourceBufferPercentage, "cpuRecommendedValue", cpuRecommendedValue)
   127  	cpuQuantity := resource.NewMilliQuantity(int64(cpuRecommendedValue*1000), resource.DecimalSI)
   128  	return cpuQuantity, nil
   129  }
   130  
   131  func (r *PercentileRecommender) getMemTargetPercentileEstimationWithUsageBuffer(taskKey *processortypes.ProcessKey, resourceBufferPercentage float64) (quantity *resource.Quantity, err error) {
   132  	klog.InfoS("getting mem estimation for namespace, workload, container, with resource buffer", "namespace", taskKey.Namespace, "workload", taskKey.WorkloadName, "container", taskKey.ContainerName, "resourceBuffer", resourceBufferPercentage)
   133  	memRecommendedValue, err := r.DataProcessor.QueryProcessedValues(taskKey)
   134  	if err != nil {
   135  		return nil, err
   136  	}
   137  	klog.InfoS("got mem recommended value from processor", "memRecommendedValue", memRecommendedValue)
   138  	// scale mem resource based on usageBuffer
   139  	memRecommendedValue = memRecommendedValue * (1 + resourceBufferPercentage)
   140  	klog.InfoS("scaled mem recommended value for container", "container", taskKey.ContainerName, "resourceBuffer", resourceBufferPercentage, "memRecommendedValue", memRecommendedValue)
   141  	memQuantity := r.getMemQuantity(memRecommendedValue)
   142  	klog.InfoS("got recommended memory for container", "container", taskKey.ContainerName, "memory", memQuantity.String())
   143  	oomRecords := r.OomRecorder.ListOOMRecords()
   144  	oomScaledMem := r.ScaleOnOOM(oomRecords, taskKey.Namespace, taskKey.WorkloadName, taskKey.ContainerName)
   145  	if oomScaledMem != nil && !oomScaledMem.IsZero() && oomScaledMem.Cmp(*memQuantity) > 0 {
   146  		klog.InfoS("container using oomProtect Memory", "container", taskKey.ContainerName, "oomScaledMem", oomScaledMem.String())
   147  		memQuantity = oomScaledMem
   148  	}
   149  	return memQuantity, nil
   150  }
   151  
   152  func (r *PercentileRecommender) getMemQuantity(memRecommendedValue float64) (quantity *resource.Quantity) {
   153  	scale := int64(1)
   154  	quotient := int64(memRecommendedValue)
   155  	remainder := int64(0)
   156  	for scale < 1024*1024 {
   157  		if quotient < 1024 {
   158  			break
   159  		}
   160  		scale *= 1024
   161  		quotient = int64(memRecommendedValue) / scale
   162  		remainder = int64(memRecommendedValue) % scale
   163  	}
   164  	if remainder == 0 {
   165  		return resource.NewQuantity(quotient*scale, resource.BinarySI)
   166  	}
   167  	return resource.NewQuantity((quotient+1)*scale, resource.BinarySI)
   168  }