k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/job_lifecycle_latency.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package common 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 batchv1 "k8s.io/api/batch/v1" 25 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 26 "k8s.io/apimachinery/pkg/runtime" 27 "k8s.io/apimachinery/pkg/util/wait" 28 "k8s.io/apimachinery/pkg/watch" 29 clientset "k8s.io/client-go/kubernetes" 30 "k8s.io/client-go/tools/cache" 31 "k8s.io/client-go/util/workqueue" 32 "k8s.io/klog/v2" 33 "k8s.io/perf-tests/clusterloader2/pkg/measurement" 34 measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util" 35 "k8s.io/perf-tests/clusterloader2/pkg/measurement/util/informer" 36 "k8s.io/perf-tests/clusterloader2/pkg/util" 37 ) 38 39 type eventData struct { 40 obj interface{} 41 recvTime time.Time 42 } 43 44 const ( 45 jobLifecycleLatencyMeasurementName = "JobLifecycleLatency" 46 checkCompletedJobsInterval = time.Second 47 jobCreated = "JobCreated" 48 jobStarted = "JobStarted" 49 jobCompleted = "JobCompleted" 50 ) 51 52 func init() { 53 if err := measurement.Register(jobLifecycleLatencyMeasurementName, createJobLifecycleLatencyMeasurement); err != nil { 54 klog.Fatalf("Can't register service %v", err) 55 } 56 } 57 58 func createJobLifecycleLatencyMeasurement() measurement.Measurement { 59 return &jobLifecycleLatencyMeasurement{ 60 selector: util.NewObjectSelector(), 61 jobStateEntries: measurementutil.NewObjectTransitionTimes(jobLifecycleLatencyMeasurementName), 62 eventQueue: workqueue.New(), 63 } 64 } 65 66 type jobLifecycleLatencyMeasurement struct { 67 selector *util.ObjectSelector 68 isRunning bool 69 stopCh chan struct{} 70 eventQueue *workqueue.Type 71 jobStateEntries *measurementutil.ObjectTransitionTimes 72 } 73 74 // Execute supports two actions: 75 // - start - Starts to observe jobs and their state transitions. 76 // - gather - Gathers and prints job latency data. 77 // heavily influenced by pod_startup_latency measurement 78 func (p *jobLifecycleLatencyMeasurement) Execute(config *measurement.Config) ([]measurement.Summary, error) { 79 action, err := util.GetString(config.Params, "action") 80 if err != nil { 81 return nil, err 82 } 83 switch action { 84 case "start": 85 if err := p.selector.Parse(config.Params); err != nil { 86 return nil, err 87 } 88 return nil, p.start(config.ClusterFramework.GetClientSets().GetClient()) 89 case "gather": 90 timeout, err := util.GetDurationOrDefault(config.Params, "timeout", defaultWaitForFinishedJobsTimeout) 91 if err != nil { 92 return nil, err 93 } 94 return p.gather(config.ClusterFramework.GetClientSets().GetClient(), config.Identifier, timeout) 95 default: 96 return nil, fmt.Errorf("unknown action %v", action) 97 } 98 99 } 100 101 // Dispose cleans up after the measurement. 102 func (p *jobLifecycleLatencyMeasurement) Dispose() { 103 p.stop() 104 } 105 106 // String returns string representation of this measurement. 107 func (p *jobLifecycleLatencyMeasurement) String() string { 108 return jobLifecycleLatencyMeasurementName + ": " + p.selector.String() 109 } 110 111 func (p *jobLifecycleLatencyMeasurement) start(c clientset.Interface) error { 112 if p.isRunning { 113 klog.V(2).Infof("%s: job lifecycle latency measurement already running", p) 114 return nil 115 } 116 klog.V(2).Infof("%s: starting job lifecycle latency measurement...", p) 117 p.isRunning = true 118 p.stopCh = make(chan struct{}) 119 i := informer.NewInformer( 120 &cache.ListWatch{ 121 ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { 122 p.selector.ApplySelectors(&options) 123 return c.BatchV1().Jobs(p.selector.Namespace).List(context.TODO(), options) 124 }, 125 WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { 126 p.selector.ApplySelectors(&options) 127 return c.BatchV1().Jobs(p.selector.Namespace).Watch(context.TODO(), options) 128 }, 129 }, 130 p.addEvent, 131 ) 132 go p.processEvents() 133 return informer.StartAndSync(i, p.stopCh, informerSyncTimeout) 134 } 135 136 func (p *jobLifecycleLatencyMeasurement) addEvent(_, obj interface{}) { 137 event := &eventData{obj: obj, recvTime: time.Now()} 138 p.eventQueue.Add(event) 139 } 140 141 func (p *jobLifecycleLatencyMeasurement) processEvents() { 142 for p.processNextWorkItem() { 143 } 144 } 145 146 func (p *jobLifecycleLatencyMeasurement) processNextWorkItem() bool { 147 item, quit := p.eventQueue.Get() 148 if quit { 149 return false 150 } 151 defer p.eventQueue.Done(item) 152 event, ok := item.(*eventData) 153 if !ok { 154 klog.Warningf("Couldn't convert work item to evetData: %v", item) 155 return true 156 } 157 p.processEvent(event) 158 return true 159 } 160 161 // processEvent processes job state change events: 162 // uses Phase Latency utility to record job state transitions 163 // it currently captures the following transitions: 164 // JobCreated (job.CreationTimestamp.Time) -> JobStarted (job.Status.StartTime.Time) 165 // JobStarted (job.Status.StartTime.Time) -> JobCompleted (job.Status.CompletionTime.Time) 166 func (p *jobLifecycleLatencyMeasurement) processEvent(event *eventData) { 167 obj := event.obj 168 if obj == nil { 169 return 170 } 171 job, ok := obj.(*batchv1.Job) 172 if !ok { 173 return 174 } 175 key := createMetaNamespaceKey(job.Namespace, job.Name) 176 if _, found := p.jobStateEntries.Get(key, jobCreated); !found { 177 p.jobStateEntries.Set(key, jobCreated, job.CreationTimestamp.Time) 178 } 179 if job.Status.StartTime != nil { 180 if _, found := p.jobStateEntries.Get(key, jobStarted); !found { 181 p.jobStateEntries.Set(key, jobStarted, job.Status.StartTime.Time) 182 } 183 } 184 if job.Status.CompletionTime != nil { 185 if _, found := p.jobStateEntries.Get(key, jobCompleted); !found { 186 p.jobStateEntries.Set(key, jobCompleted, job.Status.CompletionTime.Time) 187 } 188 } 189 } 190 191 func (p *jobLifecycleLatencyMeasurement) stop() { 192 if p.isRunning { 193 p.isRunning = false 194 close(p.stopCh) 195 p.eventQueue.ShutDown() 196 } 197 } 198 199 var jobLifecycleTransitions = map[string]measurementutil.Transition{ 200 "create_to_start": { 201 From: jobCreated, 202 To: jobStarted, 203 }, 204 "start_to_complete": { 205 From: jobStarted, 206 To: jobCompleted, 207 }, 208 } 209 210 // gather collects job lifecycle latency and calculates percentiles using Phase Latency utility 211 // it waits for all jobs to be completed before collecting the metrics or times out 212 func (p *jobLifecycleLatencyMeasurement) gather(c clientset.Interface, identifier string, timeout time.Duration) ([]measurement.Summary, error) { 213 klog.V(2).Infof("%s: gathering job lifecycle latency measurement...", p) 214 if !p.isRunning { 215 return nil, fmt.Errorf("metric %s has not been started", jobLifecycleLatencyMeasurementName) 216 } 217 condition := func() (bool, error) { 218 return p.jobStateEntries.Count(jobCreated) == p.jobStateEntries.Count(jobCompleted), nil 219 } 220 if err := wait.Poll(checkCompletedJobsInterval, timeout, condition); err != nil { 221 klog.V(2).Infof("Timed out waiting for all jobs to complete: %v", err) 222 } 223 p.stop() 224 jobLifecycleLatency := p.jobStateEntries.CalculateTransitionsLatency(jobLifecycleTransitions, measurementutil.MatchAll) 225 content, jsonErr := util.PrettyPrintJSON(measurementutil.LatencyMapToPerfData(jobLifecycleLatency)) 226 if jsonErr != nil { 227 return nil, jsonErr 228 } 229 summaryName := fmt.Sprintf("%s_%s", jobLifecycleLatencyMeasurementName, identifier) 230 summaries := []measurement.Summary{measurement.CreateSummary(summaryName, "json", content)} 231 return summaries, nil 232 } 233 234 func createMetaNamespaceKey(namespace, name string) string { 235 return namespace + "/" + name 236 }