k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/resource_usage.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package common
    18  
    19  import (
    20  	"fmt"
    21  	"math"
    22  	"strings"
    23  	"time"
    24  
    25  	"k8s.io/klog/v2"
    26  	"k8s.io/perf-tests/clusterloader2/pkg/errors"
    27  	"k8s.io/perf-tests/clusterloader2/pkg/measurement"
    28  	measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util"
    29  	"k8s.io/perf-tests/clusterloader2/pkg/measurement/util/gatherers"
    30  	"k8s.io/perf-tests/clusterloader2/pkg/util"
    31  )
    32  
    33  const (
    34  	resourceUsageMetricName = "ResourceUsageSummary"
    35  	// maxNodeCountForAllNodes defines the threshold for cluster size above which
    36  	// we no longer gather resource usage from all system components on all nodes.
    37  	maxNodeCountForAllNodes = 1000
    38  )
    39  
    40  func init() {
    41  	if err := measurement.Register(resourceUsageMetricName, createResourceUsageMetricMeasurement); err != nil {
    42  		klog.Fatalf("Cannot register %s: %v", resourceUsageMetricName, err)
    43  	}
    44  }
    45  
    46  func createResourceUsageMetricMeasurement() measurement.Measurement {
    47  	return &resourceUsageMetricMeasurement{
    48  		resourceConstraints: make(map[string]*measurementutil.ResourceConstraint),
    49  	}
    50  }
    51  
    52  type resourceUsageMetricMeasurement struct {
    53  	gatherer            *gatherers.ContainerResourceGatherer
    54  	resourceConstraints map[string]*measurementutil.ResourceConstraint
    55  }
    56  
    57  // Execute supports two actions:
    58  // - start - Starts resource metrics collecting.
    59  // - gather - Gathers and prints current resource usage metrics.
    60  func (e *resourceUsageMetricMeasurement) Execute(config *measurement.Config) ([]measurement.Summary, error) {
    61  	provider := config.ClusterFramework.GetClusterConfig().Provider
    62  	if !provider.Features().SupportResourceUsageMetering {
    63  		klog.Warningf("fetching resource usage metrics is not possible for provider %q", provider.Name())
    64  		return nil, nil
    65  	}
    66  
    67  	action, err := util.GetString(config.Params, "action")
    68  	if err != nil {
    69  		return nil, err
    70  	}
    71  
    72  	switch action {
    73  	case "start":
    74  		provider := config.ClusterFramework.GetClusterConfig().Provider
    75  		host, err := util.GetStringOrDefault(config.Params, "host", config.ClusterFramework.GetClusterConfig().GetMasterIP())
    76  		if err != nil {
    77  			return nil, err
    78  		}
    79  		namespace, err := util.GetStringOrDefault(config.Params, "namespace", "kube-system")
    80  		if err != nil {
    81  			return nil, err
    82  		}
    83  		constraintsPath, err := util.GetStringOrDefault(config.Params, "resourceConstraints", "")
    84  		if err != nil {
    85  			return nil, err
    86  		}
    87  		if constraintsPath != "" {
    88  			mapping := make(map[string]interface{})
    89  			mapping["Nodes"] = config.ClusterFramework.GetClusterConfig().Nodes
    90  			if err = config.TemplateProvider.TemplateInto(constraintsPath, mapping, &e.resourceConstraints); err != nil {
    91  				return nil, fmt.Errorf("resource constraints reading error: %v", err)
    92  			}
    93  			for _, constraint := range e.resourceConstraints {
    94  				if constraint.CPUConstraint == 0 {
    95  					constraint.CPUConstraint = math.MaxFloat64
    96  				}
    97  				if constraint.MemoryConstraint == 0 {
    98  					constraint.MemoryConstraint = math.MaxUint64
    99  				}
   100  			}
   101  		}
   102  
   103  		// Compute the node based on the cluster size.
   104  		nodeCount := config.ClusterFramework.GetClusterConfig().Nodes
   105  		nodesSet := gatherers.AllNodes
   106  		if nodeCount > maxNodeCountForAllNodes {
   107  			nodesSet = gatherers.MasterAndNonDaemons
   108  		}
   109  
   110  		klog.V(2).Infof("%s: starting resource usage collecting (mode %#v)...", e, nodesSet)
   111  		e.gatherer, err = gatherers.NewResourceUsageGatherer(config.ClusterFramework.GetClientSets().GetClient(), host, config.ClusterFramework.GetClusterConfig().KubeletPort,
   112  			provider, gatherers.ResourceGathererOptions{
   113  				InKubemark:                        provider.Features().IsKubemarkProvider,
   114  				Nodes:                             nodesSet,
   115  				ResourceDataGatheringPeriod:       60 * time.Second,
   116  				MasterResourceDataGatheringPeriod: 10 * time.Second,
   117  			}, namespace)
   118  		if err != nil {
   119  			return nil, err
   120  		}
   121  		go e.gatherer.StartGatheringData()
   122  		return nil, nil
   123  	case "gather":
   124  		if e.gatherer == nil {
   125  			klog.Errorf("%s: gatherer not initialized", e)
   126  			return nil, nil
   127  		}
   128  		klog.V(2).Infof("%s: gathering resource usage...", e)
   129  		summary, err := e.gatherer.StopAndSummarize([]int{50, 90, 99, 100})
   130  		if err != nil {
   131  			return nil, err
   132  		}
   133  		content, err := util.PrettyPrintJSON(summary)
   134  		if err != nil {
   135  			return nil, err
   136  		}
   137  		resourceSummary := measurement.CreateSummary(resourceUsageMetricName, "json", content)
   138  		return []measurement.Summary{resourceSummary}, e.verifySummary(summary)
   139  
   140  	default:
   141  		return nil, fmt.Errorf("unknown action %v", action)
   142  	}
   143  }
   144  
   145  // Dispose cleans up after the measurement.
   146  func (e *resourceUsageMetricMeasurement) Dispose() {
   147  	if e.gatherer != nil {
   148  		e.gatherer.Dispose()
   149  	}
   150  }
   151  
   152  // String returns string representation of this measurement.
   153  func (*resourceUsageMetricMeasurement) String() string {
   154  	return resourceUsageMetricName
   155  }
   156  
   157  func (e *resourceUsageMetricMeasurement) verifySummary(summary *gatherers.ResourceUsageSummary) error {
   158  	violatedConstraints := make([]string, 0)
   159  	for _, containerSummary := range summary.Get("99") {
   160  		containerName := strings.Split(containerSummary.Name, "/")[1]
   161  		if constraint, ok := e.resourceConstraints[containerName]; ok {
   162  			if containerSummary.CPU > constraint.CPUConstraint {
   163  				violatedConstraints = append(
   164  					violatedConstraints,
   165  					fmt.Sprintf("container %v is using %v/%v CPU",
   166  						containerSummary.Name,
   167  						containerSummary.CPU,
   168  						constraint.CPUConstraint,
   169  					),
   170  				)
   171  			}
   172  			if containerSummary.Mem > constraint.MemoryConstraint {
   173  				violatedConstraints = append(
   174  					violatedConstraints,
   175  					fmt.Sprintf("container %v is using %v/%v MB of memory",
   176  						containerSummary.Name,
   177  						float64(containerSummary.Mem)/(1024*1024),
   178  						float64(constraint.MemoryConstraint)/(1024*1024),
   179  					),
   180  				)
   181  			}
   182  		}
   183  	}
   184  	if len(violatedConstraints) > 0 {
   185  		for i := range violatedConstraints {
   186  			klog.Errorf("%s: violation: %s", e, violatedConstraints[i])
   187  		}
   188  		return errors.NewMetricViolationError("resource constraints", fmt.Sprintf("%d constraints violated: %v", len(violatedConstraints), violatedConstraints))
   189  	}
   190  	return nil
   191  }