k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/scheduling_throughput.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package common 18 19 import ( 20 "fmt" 21 "math" 22 "sort" 23 "time" 24 25 clientset "k8s.io/client-go/kubernetes" 26 "k8s.io/klog/v2" 27 "k8s.io/perf-tests/clusterloader2/pkg/errors" 28 "k8s.io/perf-tests/clusterloader2/pkg/measurement" 29 measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util" 30 "k8s.io/perf-tests/clusterloader2/pkg/util" 31 ) 32 33 const ( 34 schedulingThroughputMeasurementName = "SchedulingThroughput" 35 defaultSchedulingThroughputInterval = 5 * time.Second 36 ) 37 38 func init() { 39 if err := measurement.Register(schedulingThroughputMeasurementName, createSchedulingThroughputMeasurement); err != nil { 40 klog.Fatalf("Cannot register %s: %v", schedulingThroughputMeasurementName, err) 41 } 42 } 43 44 func createSchedulingThroughputMeasurement() measurement.Measurement { 45 return &schedulingThroughputMeasurement{} 46 } 47 48 type schedulingThroughputMeasurement struct { 49 schedulingThroughputs []float64 50 isRunning bool 51 stopCh chan struct{} 52 } 53 54 // Execute supports two actions: 55 // - start - starts the pods scheduling observation. 56 // Pods can be specified by field and/or label selectors. 57 // If namespace is not passed by parameter, all-namespace scope is assumed. 58 // - gather - creates summary for observed values. 59 func (s *schedulingThroughputMeasurement) Execute(config *measurement.Config) ([]measurement.Summary, error) { 60 action, err := util.GetString(config.Params, "action") 61 if err != nil { 62 return nil, err 63 } 64 switch action { 65 case "start": 66 if s.isRunning { 67 klog.V(3).Infof("%s: measurement already running", s) 68 return nil, nil 69 } 70 selector := util.NewObjectSelector() 71 if err := selector.Parse(config.Params); err != nil { 72 return nil, err 73 } 74 measurmentInterval, err := util.GetDurationOrDefault(config.Params, "measurmentInterval", defaultSchedulingThroughputInterval) 75 if err != nil { 76 return nil, err 77 } 78 s.stopCh = make(chan struct{}) 79 return nil, s.start(config.ClusterFramework.GetClientSets().GetClient(), selector, measurmentInterval) 80 case "gather": 81 threshold, err := util.GetFloat64OrDefault(config.Params, "threshold", 0) 82 if err != nil { 83 klog.Warningf("error while getting threshold param: %v", err) 84 } 85 enableViolations, err := util.GetBoolOrDefault(config.Params, "enableViolations", true) 86 if err != nil { 87 klog.Warningf("error while getting enableViolations param: %v", err) 88 } 89 summary, err := s.gather(threshold) 90 if err != nil { 91 if !errors.IsMetricViolationError(err) { 92 klog.Errorf("%s gathering error: %v", config.Identifier, err) 93 return nil, err 94 } 95 if !enableViolations { 96 err = nil 97 } 98 } 99 return summary, err 100 default: 101 return nil, fmt.Errorf("unknown action %v", action) 102 } 103 } 104 105 // Dispose cleans up after the measurement. 106 func (s *schedulingThroughputMeasurement) Dispose() { 107 s.stop() 108 } 109 110 // String returns a string representation of the measurement. 111 func (*schedulingThroughputMeasurement) String() string { 112 return schedulingThroughputMeasurementName 113 } 114 115 func (s *schedulingThroughputMeasurement) start(clientSet clientset.Interface, selector *util.ObjectSelector, measurmentInterval time.Duration) error { 116 ps, err := measurementutil.NewPodStore(clientSet, selector) 117 if err != nil { 118 return fmt.Errorf("pod store creation error: %v", err) 119 } 120 s.isRunning = true 121 klog.V(2).Infof("%s: starting collecting throughput data", s) 122 123 go func() { 124 defer ps.Stop() 125 lastScheduledCount := 0 126 for { 127 select { 128 case <-s.stopCh: 129 return 130 case <-time.After(measurmentInterval): 131 pods, err := ps.List() 132 if err != nil { 133 // List in NewPodStore never returns error. 134 // TODO(mborsz): Even if this is a case now, it doesn't need to be true in future. Refactor this. 135 panic(fmt.Errorf("unexpected error on PodStore.List: %w", err)) 136 } 137 podsStatus := measurementutil.ComputePodsStartupStatus(pods, 0, nil /* updatePodPredicate */) 138 throughput := float64(podsStatus.Scheduled-lastScheduledCount) / float64(measurmentInterval/time.Second) 139 s.schedulingThroughputs = append(s.schedulingThroughputs, throughput) 140 lastScheduledCount = podsStatus.Scheduled 141 klog.V(3).Infof("%v: %s: %d pods scheduled", s, selector.String(), lastScheduledCount) 142 } 143 } 144 }() 145 return nil 146 } 147 148 func (s *schedulingThroughputMeasurement) gather(threshold float64) ([]measurement.Summary, error) { 149 if !s.isRunning { 150 klog.Errorf("%s: measurement is not running", s) 151 return nil, fmt.Errorf("measurement is not running") 152 } 153 s.stop() 154 klog.V(2).Infof("%s: gathering data", s) 155 156 throughputSummary := &schedulingThroughput{} 157 if length := len(s.schedulingThroughputs); length > 0 { 158 sort.Float64s(s.schedulingThroughputs) 159 throughputSummary.Perc50 = s.schedulingThroughputs[int(math.Ceil(float64(length*50)/100))-1] 160 throughputSummary.Perc90 = s.schedulingThroughputs[int(math.Ceil(float64(length*90)/100))-1] 161 throughputSummary.Perc99 = s.schedulingThroughputs[int(math.Ceil(float64(length*99)/100))-1] 162 throughputSummary.Max = s.schedulingThroughputs[length-1] 163 } 164 content, err := util.PrettyPrintJSON(throughputSummary) 165 if err != nil { 166 return nil, err 167 } 168 summary := measurement.CreateSummary(schedulingThroughputMeasurementName, "json", content) 169 if threshold > 0 && throughputSummary.Max < threshold { 170 err = errors.NewMetricViolationError( 171 "scheduler throughput", 172 fmt.Sprintf("actual throughput %f lower than threshold %f", throughputSummary.Max, threshold)) 173 } 174 return []measurement.Summary{summary}, err 175 } 176 177 func (s *schedulingThroughputMeasurement) stop() { 178 if s.isRunning { 179 close(s.stopCh) 180 s.isRunning = false 181 } 182 } 183 184 type schedulingThroughput struct { 185 Perc50 float64 `json:"perc50"` 186 Perc90 float64 `json:"perc90"` 187 Perc99 float64 `json:"perc99"` 188 Max float64 `json:"max"` 189 }