github.com/gocrane/crane@v0.11.0/pkg/recommendation/recommender/resource/recommend.go

github.com/gocrane/crane@v0.11.0/pkg/recommendation/recommender/resource/recommend.go (about)

     1  package resource
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"reflect"
     7  	"strings"
     8  	"time"
     9  
    10  	corev1 "k8s.io/api/core/v1"
    11  	"k8s.io/apimachinery/pkg/api/resource"
    12  	recommendermodel "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model"
    13  	"k8s.io/klog/v2"
    14  	"sigs.k8s.io/yaml"
    15  
    16  	predictionapi "github.com/gocrane/api/prediction/v1alpha1"
    17  
    18  	"github.com/gocrane/crane/pkg/metricnaming"
    19  	"github.com/gocrane/crane/pkg/oom"
    20  	"github.com/gocrane/crane/pkg/prediction/config"
    21  	"github.com/gocrane/crane/pkg/recommend/types"
    22  	"github.com/gocrane/crane/pkg/recommendation/framework"
    23  	"github.com/gocrane/crane/pkg/utils"
    24  )
    25  
    26  const callerFormat = "ResourceRecommendationCaller-%s-%s"
    27  
    28  type PatchResource struct {
    29  	Spec PatchResourceSpec `json:"spec,omitempty"`
    30  }
    31  
    32  type PatchResourceSpec struct {
    33  	Template PatchResourcePodTemplateSpec `json:"template"`
    34  }
    35  
    36  type PatchResourcePodTemplateSpec struct {
    37  	Spec PatchResourcePodSpec `json:"spec,omitempty"`
    38  }
    39  
    40  type PatchResourcePodSpec struct {
    41  	// +patchMergeKey=name
    42  	// +patchStrategy=merge
    43  	Containers []corev1.Container `json:"containers" patchStrategy:"merge" patchMergeKey:"name"`
    44  }
    45  
    46  func (rr *ResourceRecommender) PreRecommend(ctx *framework.RecommendationContext) error {
    47  	return nil
    48  }
    49  
    50  func (rr *ResourceRecommender) makeCpuConfig() *config.Config {
    51  	return &config.Config{
    52  		Percentile: &predictionapi.Percentile{
    53  			Aggregated:        true,
    54  			HistoryLength:     rr.CpuModelHistoryLength,
    55  			SampleInterval:    rr.CpuSampleInterval,
    56  			MarginFraction:    rr.CpuRequestMarginFraction,
    57  			TargetUtilization: rr.CpuTargetUtilization,
    58  			Percentile:        rr.CpuRequestPercentile,
    59  			Histogram: predictionapi.HistogramConfig{
    60  				HalfLife:   "24h",
    61  				BucketSize: rr.CpuHistogramBucketSize,
    62  				MaxValue:   rr.CpuHistogramMaxValue,
    63  			},
    64  		},
    65  	}
    66  }
    67  
    68  func (rr *ResourceRecommender) makeMemConfig() *config.Config {
    69  	return &config.Config{
    70  		Percentile: &predictionapi.Percentile{
    71  			Aggregated:        true,
    72  			HistoryLength:     rr.MemHistoryLength,
    73  			SampleInterval:    rr.MemSampleInterval,
    74  			MarginFraction:    rr.MemMarginFraction,
    75  			Percentile:        rr.MemPercentile,
    76  			TargetUtilization: rr.MemTargetUtilization,
    77  			Histogram: predictionapi.HistogramConfig{
    78  				HalfLife:   "48h",
    79  				BucketSize: rr.MemHistogramBucketSize,
    80  				MaxValue:   rr.MemHistogramMaxValue,
    81  			},
    82  		},
    83  	}
    84  }
    85  
    86  func (rr *ResourceRecommender) Recommend(ctx *framework.RecommendationContext) error {
    87  	predictor := ctx.PredictorMgr.GetPredictor(predictionapi.AlgorithmTypePercentile)
    88  	if predictor == nil {
    89  		return fmt.Errorf("predictor %v not found", predictionapi.AlgorithmTypePercentile)
    90  	}
    91  
    92  	resourceRecommendation := &types.ResourceRequestRecommendation{}
    93  
    94  	var newContainers []corev1.Container
    95  	var oldContainers []corev1.Container
    96  
    97  	oomRecords, err := ctx.OOMRecorder.GetOOMRecord()
    98  	if err != nil {
    99  		return err
   100  	}
   101  
   102  	namespace := ctx.Object.GetNamespace()
   103  	for _, c := range ctx.Pods[0].Spec.Containers {
   104  		cr := types.ContainerRecommendation{
   105  			ContainerName: c.Name,
   106  			Target:        map[corev1.ResourceName]string{},
   107  		}
   108  
   109  		caller := fmt.Sprintf(callerFormat, klog.KObj(ctx.Recommendation), ctx.Recommendation.UID)
   110  		metricNamer := metricnaming.ResourceToContainerMetricNamer(namespace, ctx.Recommendation.Spec.TargetRef.APIVersion,
   111  			ctx.Recommendation.Spec.TargetRef.Kind, ctx.Recommendation.Spec.TargetRef.Name, c.Name, corev1.ResourceCPU, caller)
   112  		klog.Infof("%s: CPU query for resource request recommendation: %s", ctx.String(), metricNamer.BuildUniqueKey())
   113  		cpuConfig := rr.makeCpuConfig()
   114  		tsList, err := utils.QueryPredictedValuesOnce(ctx.Recommendation, predictor, caller, cpuConfig, metricNamer)
   115  		if err != nil {
   116  			return err
   117  		}
   118  		if len(tsList) < 1 || len(tsList[0].Samples) < 1 {
   119  			return fmt.Errorf("no value retured for queryExpr: %s", metricNamer.BuildUniqueKey())
   120  		}
   121  		v := int64(tsList[0].Samples[0].Value * 1000)
   122  		cpuQuantity := resource.NewMilliQuantity(v, resource.DecimalSI)
   123  		klog.Infof("%s: container %s recommended cpu %s", ctx.String(), c.Name, cpuQuantity.String())
   124  
   125  		metricNamer = metricnaming.ResourceToContainerMetricNamer(namespace, ctx.Recommendation.Spec.TargetRef.APIVersion,
   126  			ctx.Recommendation.Spec.TargetRef.Kind, ctx.Recommendation.Spec.TargetRef.Name, c.Name, corev1.ResourceMemory, caller)
   127  		klog.Infof("%s Memory query for resource request recommendation: %s", ctx.String(), metricNamer.BuildUniqueKey())
   128  		memConfig := rr.makeMemConfig()
   129  		tsList, err = utils.QueryPredictedValuesOnce(ctx.Recommendation, predictor, caller, memConfig, metricNamer)
   130  		if err != nil {
   131  			return err
   132  		}
   133  		if len(tsList) < 1 || len(tsList[0].Samples) < 1 {
   134  			return fmt.Errorf("no value retured for queryExpr: %s", metricNamer.BuildUniqueKey())
   135  		}
   136  		v = int64(tsList[0].Samples[0].Value)
   137  		if v <= 0 {
   138  			return fmt.Errorf("no enough metrics")
   139  		}
   140  		memQuantity := resource.NewQuantity(v, resource.BinarySI)
   141  		klog.Infof("%s: container %s recommended memory %s", ctx.String(), c.Name, memQuantity.String())
   142  
   143  		// Use oom protected memory if exist
   144  		if rr.OOMProtection {
   145  			oomProtectMem := rr.MemoryOOMProtection(oomRecords, namespace, ctx.Object.GetName(), c.Name)
   146  			if oomProtectMem != nil && !oomProtectMem.IsZero() && oomProtectMem.Cmp(*memQuantity) > 0 {
   147  				klog.Infof("%s: container %s using oomProtect Memory %s", ctx.String(), c.Name, oomProtectMem.String())
   148  				memQuantity = oomProtectMem
   149  			}
   150  		}
   151  
   152  		// Resource Specification enabled
   153  		if rr.Specification {
   154  			normalizedCpu, normalizedMem := GetNormalizedResource(cpuQuantity, memQuantity, rr.SpecificationConfigs)
   155  			klog.Infof("GetNormalizedResource currentCpu %s normalizedCpu %s currentMem %s normalizedMem %s", cpuQuantity.String(), normalizedCpu.String(), memQuantity.String(), normalizedMem.String())
   156  			if normalizedCpu.Value() > 0 && normalizedMem.Value() > 0 {
   157  				cpuQuantity = &normalizedCpu
   158  				memQuantity = &normalizedMem
   159  			}
   160  		}
   161  
   162  		cr.Target[corev1.ResourceCPU] = cpuQuantity.String()
   163  		cr.Target[corev1.ResourceMemory] = memQuantity.String()
   164  
   165  		newContainerSpec := corev1.Container{
   166  			Name: c.Name,
   167  			Resources: corev1.ResourceRequirements{
   168  				Requests: corev1.ResourceList{
   169  					corev1.ResourceCPU:    *cpuQuantity,
   170  					corev1.ResourceMemory: *memQuantity,
   171  				},
   172  			},
   173  		}
   174  
   175  		oldContainerSpec := corev1.Container{
   176  			Name: c.Name,
   177  			Resources: corev1.ResourceRequirements{
   178  				Requests: corev1.ResourceList{
   179  					corev1.ResourceCPU:    c.Resources.Requests[corev1.ResourceCPU],
   180  					corev1.ResourceMemory: c.Resources.Requests[corev1.ResourceMemory],
   181  				},
   182  			},
   183  		}
   184  
   185  		newContainers = append(newContainers, newContainerSpec)
   186  		oldContainers = append(oldContainers, oldContainerSpec)
   187  
   188  		resourceRecommendation.Containers = append(resourceRecommendation.Containers, cr)
   189  	}
   190  
   191  	value := types.ProposedRecommendation{
   192  		ResourceRequest: resourceRecommendation,
   193  	}
   194  
   195  	valueBytes, err := yaml.Marshal(value)
   196  	if err != nil {
   197  		return fmt.Errorf("%s yaml marshal failed: %v", rr.Name(), err)
   198  	}
   199  
   200  	ctx.Recommendation.Status.RecommendedValue = string(valueBytes)
   201  
   202  	var newPatch PatchResource
   203  	newPatch.Spec.Template.Spec.Containers = newContainers
   204  	newPatchBytes, err := json.Marshal(newPatch)
   205  	if err != nil {
   206  		return fmt.Errorf("marshal newPatch failed %s. ", err)
   207  	}
   208  
   209  	var oldPatch PatchResource
   210  	oldPatch.Spec.Template.Spec.Containers = oldContainers
   211  	oldPatchBytes, err := json.Marshal(oldPatch)
   212  	if err != nil {
   213  		return fmt.Errorf("marshal oldPatch failed %s. ", err)
   214  	}
   215  
   216  	if reflect.DeepEqual(&newPatch, &oldPatch) {
   217  		ctx.Recommendation.Status.Action = "None"
   218  	} else {
   219  		ctx.Recommendation.Status.Action = "Patch"
   220  	}
   221  
   222  	ctx.Recommendation.Status.RecommendedInfo = string(newPatchBytes)
   223  	ctx.Recommendation.Status.CurrentInfo = string(oldPatchBytes)
   224  
   225  	return nil
   226  }
   227  
   228  // Policy add some logic for result of recommend phase.
   229  func (rr *ResourceRecommender) Policy(ctx *framework.RecommendationContext) error {
   230  	return nil
   231  }
   232  
   233  func (rr *ResourceRecommender) MemoryOOMProtection(oomRecords []oom.OOMRecord, namespace string, workloadName string, containerName string) *resource.Quantity {
   234  	var oomRecord *oom.OOMRecord
   235  	for _, record := range oomRecords {
   236  		// use oomRecord for all pods in workload
   237  		if strings.HasPrefix(record.Pod, workloadName) && containerName == record.Container && namespace == record.Namespace {
   238  			oomRecord = &record
   239  			break
   240  		}
   241  	}
   242  
   243  	// ignore too old oom events
   244  	if oomRecord != nil && time.Since(oomRecord.OOMAt) <= (time.Hour*24*7) {
   245  		memoryOOM := oomRecord.Memory.Value()
   246  		var memoryNeeded recommendermodel.ResourceAmount
   247  
   248  		memoryNeeded = recommendermodel.ResourceAmountMax(recommendermodel.ResourceAmount(memoryOOM)+recommendermodel.MemoryAmountFromBytes(recommendermodel.OOMMinBumpUp),
   249  			recommendermodel.ScaleResource(recommendermodel.ResourceAmount(memoryOOM), rr.OOMBumpRatio))
   250  
   251  		return resource.NewQuantity(int64(memoryNeeded), resource.BinarySI)
   252  	}
   253  
   254  	return nil
   255  }