k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/resource_usage.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package common 18 19 import ( 20 "fmt" 21 "math" 22 "strings" 23 "time" 24 25 "k8s.io/klog/v2" 26 "k8s.io/perf-tests/clusterloader2/pkg/errors" 27 "k8s.io/perf-tests/clusterloader2/pkg/measurement" 28 measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util" 29 "k8s.io/perf-tests/clusterloader2/pkg/measurement/util/gatherers" 30 "k8s.io/perf-tests/clusterloader2/pkg/util" 31 ) 32 33 const ( 34 resourceUsageMetricName = "ResourceUsageSummary" 35 // maxNodeCountForAllNodes defines the threshold for cluster size above which 36 // we no longer gather resource usage from all system components on all nodes. 37 maxNodeCountForAllNodes = 1000 38 ) 39 40 func init() { 41 if err := measurement.Register(resourceUsageMetricName, createResourceUsageMetricMeasurement); err != nil { 42 klog.Fatalf("Cannot register %s: %v", resourceUsageMetricName, err) 43 } 44 } 45 46 func createResourceUsageMetricMeasurement() measurement.Measurement { 47 return &resourceUsageMetricMeasurement{ 48 resourceConstraints: make(map[string]*measurementutil.ResourceConstraint), 49 } 50 } 51 52 type resourceUsageMetricMeasurement struct { 53 gatherer *gatherers.ContainerResourceGatherer 54 resourceConstraints map[string]*measurementutil.ResourceConstraint 55 } 56 57 // Execute supports two actions: 58 // - start - Starts resource metrics collecting. 59 // - gather - Gathers and prints current resource usage metrics. 60 func (e *resourceUsageMetricMeasurement) Execute(config *measurement.Config) ([]measurement.Summary, error) { 61 provider := config.ClusterFramework.GetClusterConfig().Provider 62 if !provider.Features().SupportResourceUsageMetering { 63 klog.Warningf("fetching resource usage metrics is not possible for provider %q", provider.Name()) 64 return nil, nil 65 } 66 67 action, err := util.GetString(config.Params, "action") 68 if err != nil { 69 return nil, err 70 } 71 72 switch action { 73 case "start": 74 provider := config.ClusterFramework.GetClusterConfig().Provider 75 host, err := util.GetStringOrDefault(config.Params, "host", config.ClusterFramework.GetClusterConfig().GetMasterIP()) 76 if err != nil { 77 return nil, err 78 } 79 namespace, err := util.GetStringOrDefault(config.Params, "namespace", "kube-system") 80 if err != nil { 81 return nil, err 82 } 83 constraintsPath, err := util.GetStringOrDefault(config.Params, "resourceConstraints", "") 84 if err != nil { 85 return nil, err 86 } 87 if constraintsPath != "" { 88 mapping := make(map[string]interface{}) 89 mapping["Nodes"] = config.ClusterFramework.GetClusterConfig().Nodes 90 if err = config.TemplateProvider.TemplateInto(constraintsPath, mapping, &e.resourceConstraints); err != nil { 91 return nil, fmt.Errorf("resource constraints reading error: %v", err) 92 } 93 for _, constraint := range e.resourceConstraints { 94 if constraint.CPUConstraint == 0 { 95 constraint.CPUConstraint = math.MaxFloat64 96 } 97 if constraint.MemoryConstraint == 0 { 98 constraint.MemoryConstraint = math.MaxUint64 99 } 100 } 101 } 102 103 // Compute the node based on the cluster size. 104 nodeCount := config.ClusterFramework.GetClusterConfig().Nodes 105 nodesSet := gatherers.AllNodes 106 if nodeCount > maxNodeCountForAllNodes { 107 nodesSet = gatherers.MasterAndNonDaemons 108 } 109 110 klog.V(2).Infof("%s: starting resource usage collecting (mode %#v)...", e, nodesSet) 111 e.gatherer, err = gatherers.NewResourceUsageGatherer(config.ClusterFramework.GetClientSets().GetClient(), host, config.ClusterFramework.GetClusterConfig().KubeletPort, 112 provider, gatherers.ResourceGathererOptions{ 113 InKubemark: provider.Features().IsKubemarkProvider, 114 Nodes: nodesSet, 115 ResourceDataGatheringPeriod: 60 * time.Second, 116 MasterResourceDataGatheringPeriod: 10 * time.Second, 117 }, namespace) 118 if err != nil { 119 return nil, err 120 } 121 go e.gatherer.StartGatheringData() 122 return nil, nil 123 case "gather": 124 if e.gatherer == nil { 125 klog.Errorf("%s: gatherer not initialized", e) 126 return nil, nil 127 } 128 klog.V(2).Infof("%s: gathering resource usage...", e) 129 summary, err := e.gatherer.StopAndSummarize([]int{50, 90, 99, 100}) 130 if err != nil { 131 return nil, err 132 } 133 content, err := util.PrettyPrintJSON(summary) 134 if err != nil { 135 return nil, err 136 } 137 resourceSummary := measurement.CreateSummary(resourceUsageMetricName, "json", content) 138 return []measurement.Summary{resourceSummary}, e.verifySummary(summary) 139 140 default: 141 return nil, fmt.Errorf("unknown action %v", action) 142 } 143 } 144 145 // Dispose cleans up after the measurement. 146 func (e *resourceUsageMetricMeasurement) Dispose() { 147 if e.gatherer != nil { 148 e.gatherer.Dispose() 149 } 150 } 151 152 // String returns string representation of this measurement. 153 func (*resourceUsageMetricMeasurement) String() string { 154 return resourceUsageMetricName 155 } 156 157 func (e *resourceUsageMetricMeasurement) verifySummary(summary *gatherers.ResourceUsageSummary) error { 158 violatedConstraints := make([]string, 0) 159 for _, containerSummary := range summary.Get("99") { 160 containerName := strings.Split(containerSummary.Name, "/")[1] 161 if constraint, ok := e.resourceConstraints[containerName]; ok { 162 if containerSummary.CPU > constraint.CPUConstraint { 163 violatedConstraints = append( 164 violatedConstraints, 165 fmt.Sprintf("container %v is using %v/%v CPU", 166 containerSummary.Name, 167 containerSummary.CPU, 168 constraint.CPUConstraint, 169 ), 170 ) 171 } 172 if containerSummary.Mem > constraint.MemoryConstraint { 173 violatedConstraints = append( 174 violatedConstraints, 175 fmt.Sprintf("container %v is using %v/%v MB of memory", 176 containerSummary.Name, 177 float64(containerSummary.Mem)/(1024*1024), 178 float64(constraint.MemoryConstraint)/(1024*1024), 179 ), 180 ) 181 } 182 } 183 } 184 if len(violatedConstraints) > 0 { 185 for i := range violatedConstraints { 186 klog.Errorf("%s: violation: %s", e, violatedConstraints[i]) 187 } 188 return errors.NewMetricViolationError("resource constraints", fmt.Sprintf("%d constraints violated: %v", len(violatedConstraints), violatedConstraints)) 189 } 190 return nil 191 }