k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/service_creation_latency.go (about) 1 /* 2 Copyright 2019 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package common 18 19 import ( 20 "context" 21 "fmt" 22 "net" 23 "sync" 24 "time" 25 26 corev1 "k8s.io/api/core/v1" 27 "k8s.io/apimachinery/pkg/api/equality" 28 v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/runtime" 30 "k8s.io/apimachinery/pkg/watch" 31 clientset "k8s.io/client-go/kubernetes" 32 "k8s.io/client-go/tools/cache" 33 "k8s.io/klog/v2" 34 35 "k8s.io/apimachinery/pkg/util/wait" 36 "k8s.io/perf-tests/clusterloader2/pkg/execservice" 37 "k8s.io/perf-tests/clusterloader2/pkg/measurement" 38 measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util" 39 "k8s.io/perf-tests/clusterloader2/pkg/measurement/util/checker" 40 "k8s.io/perf-tests/clusterloader2/pkg/measurement/util/informer" 41 "k8s.io/perf-tests/clusterloader2/pkg/measurement/util/workerqueue" 42 "k8s.io/perf-tests/clusterloader2/pkg/util" 43 ) 44 45 const ( 46 serviceCreationLatencyName = "ServiceCreationLatency" 47 serviceCreationLatencyWorkers = 10 48 defaultServiceCreationLatencyTimeout = 10 * time.Minute 49 defaultCheckInterval = 10 * time.Second 50 pingBackoff = 1 * time.Second 51 pingChecks = 3 52 53 creatingPhase = "creating" 54 ipAssigningPhase = "ipAssigning" 55 reachabilityPhase = "reachability" 56 deletingPhase = "deleting" 57 deletedPhase = "deleted" 58 ) 59 60 func init() { 61 if err := measurement.Register(serviceCreationLatencyName, createServiceCreationLatencyMeasurement); err != nil { 62 klog.Fatalf("cant register service %v", err) 63 } 64 } 65 66 func createServiceCreationLatencyMeasurement() measurement.Measurement { 67 return &serviceCreationLatencyMeasurement{ 68 selector: util.NewObjectSelector(), 69 queue: workerqueue.NewWorkerQueue(serviceCreationLatencyWorkers), 70 creationTimes: measurementutil.NewObjectTransitionTimes(serviceCreationLatencyName), 71 pingCheckers: checker.NewMap(), 72 } 73 } 74 75 type serviceCreationLatencyMeasurement struct { 76 selector *util.ObjectSelector 77 waitTimeout time.Duration 78 stopCh chan struct{} 79 isRunning bool 80 queue workerqueue.Interface 81 client clientset.Interface 82 creationTimes *measurementutil.ObjectTransitionTimes 83 pingCheckers checker.Map 84 lock sync.Mutex 85 } 86 87 // Execute executes service startup latency measurement actions. 88 // Services can be specified by field and/or label selectors. 89 // If namespace is not passed by parameter, all-namespace scope is assumed. 90 // "start" action starts observation of the services. 91 // "waitForReady" waits until all services are reachable. 92 // "waitForDeletion" waits until all services are deleted 93 // "gather" returns service created latency summary. 94 // This measurement only works for services with ClusterIP, NodePort and LoadBalancer type. 95 func (s *serviceCreationLatencyMeasurement) Execute(config *measurement.Config) ([]measurement.Summary, error) { 96 s.client = config.ClusterFramework.GetClientSets().GetClient() 97 action, err := util.GetString(config.Params, "action") 98 if err != nil { 99 return nil, err 100 } 101 if !config.ClusterLoaderConfig.ExecServiceConfig.Enable { 102 return nil, fmt.Errorf("enable-exec-service flag not enabled") 103 } 104 105 switch action { 106 case "start": 107 if err := s.selector.Parse(config.Params); err != nil { 108 return nil, err 109 } 110 s.waitTimeout, err = util.GetDurationOrDefault(config.Params, "waitTimeout", defaultServiceCreationLatencyTimeout) 111 if err != nil { 112 return nil, err 113 } 114 return nil, s.start() 115 case "waitForReady": 116 return nil, s.waitForReady() 117 case "waitForDeletion": 118 return nil, s.waitForDeletion() 119 case "gather": 120 return s.gather(config.Identifier) 121 default: 122 return nil, fmt.Errorf("unknown action %v", action) 123 } 124 } 125 126 // Dispose cleans up after the measurement. 127 func (s *serviceCreationLatencyMeasurement) Dispose() { 128 if s.isRunning { 129 s.isRunning = false 130 close(s.stopCh) 131 } 132 s.queue.Stop() 133 s.lock.Lock() 134 defer s.lock.Unlock() 135 s.pingCheckers.Dispose() 136 } 137 138 // String returns a string representation of the metric. 139 func (s *serviceCreationLatencyMeasurement) String() string { 140 return serviceCreationLatencyName + ": " + s.selector.String() 141 } 142 143 func (s *serviceCreationLatencyMeasurement) start() error { 144 if s.isRunning { 145 klog.V(2).Infof("%s: service creation latency measurement already running", s) 146 return nil 147 } 148 klog.V(2).Infof("%s: starting service creation latency measurement...", s) 149 150 s.isRunning = true 151 s.stopCh = make(chan struct{}) 152 153 i := informer.NewInformer( 154 &cache.ListWatch{ 155 ListFunc: func(options v1.ListOptions) (runtime.Object, error) { 156 s.selector.ApplySelectors(&options) 157 return s.client.CoreV1().Services(s.selector.Namespace).List(context.TODO(), options) 158 }, 159 WatchFunc: func(options v1.ListOptions) (watch.Interface, error) { 160 s.selector.ApplySelectors(&options) 161 return s.client.CoreV1().Services(s.selector.Namespace).Watch(context.TODO(), options) 162 }, 163 }, 164 func(oldObj, newObj interface{}) { 165 f := func() { 166 s.handleObject(oldObj, newObj) 167 } 168 s.queue.Add(&f) 169 }, 170 ) 171 return informer.StartAndSync(i, s.stopCh, informerSyncTimeout) 172 } 173 174 func (s *serviceCreationLatencyMeasurement) waitForReady() error { 175 return wait.Poll(defaultCheckInterval, s.waitTimeout, func() (bool, error) { 176 for _, svcType := range []corev1.ServiceType{corev1.ServiceTypeClusterIP, corev1.ServiceTypeNodePort, corev1.ServiceTypeLoadBalancer} { 177 reachable := s.creationTimes.Count(phaseName(reachabilityPhase, svcType)) 178 created := s.creationTimes.Count(phaseName(creatingPhase, svcType)) 179 klog.V(2).Infof("%s type %s: %d created, %d reachable", s, svcType, created, reachable) 180 if created != reachable { 181 return false, nil 182 } 183 } 184 return true, nil 185 }) 186 } 187 188 func (s *serviceCreationLatencyMeasurement) waitForDeletion() error { 189 return wait.Poll(defaultCheckInterval, s.waitTimeout, func() (bool, error) { 190 for _, svcType := range []corev1.ServiceType{corev1.ServiceTypeClusterIP, corev1.ServiceTypeNodePort, corev1.ServiceTypeLoadBalancer} { 191 deleted := s.creationTimes.Count(phaseName(deletedPhase, svcType)) 192 created := s.creationTimes.Count(phaseName(creatingPhase, svcType)) 193 klog.V(2).Infof("%s type %s: %d created, %d deleted", s, svcType, created, deleted) 194 if created != deleted { 195 return false, nil 196 } 197 } 198 return true, nil 199 }) 200 } 201 202 var serviceCreationTransitions = map[string]measurementutil.Transition{ 203 "create_to_available_clusterip": { 204 From: phaseName(creatingPhase, corev1.ServiceTypeClusterIP), 205 To: phaseName(reachabilityPhase, corev1.ServiceTypeClusterIP), 206 }, 207 "create_to_available_nodeport": { 208 From: phaseName(creatingPhase, corev1.ServiceTypeNodePort), 209 To: phaseName(reachabilityPhase, corev1.ServiceTypeNodePort), 210 }, 211 "create_to_assigned_loadbalancer": { 212 From: phaseName(creatingPhase, corev1.ServiceTypeLoadBalancer), 213 To: phaseName(ipAssigningPhase, corev1.ServiceTypeLoadBalancer), 214 }, 215 "assigned_to_available_loadbalancer": { 216 From: phaseName(ipAssigningPhase, corev1.ServiceTypeLoadBalancer), 217 To: phaseName(reachabilityPhase, corev1.ServiceTypeLoadBalancer), 218 }, 219 "create_to_available_loadbalancer": { 220 From: phaseName(creatingPhase, corev1.ServiceTypeLoadBalancer), 221 To: phaseName(reachabilityPhase, corev1.ServiceTypeLoadBalancer), 222 }, 223 "delete_loadbalancer": { 224 From: phaseName(deletingPhase, corev1.ServiceTypeLoadBalancer), 225 To: phaseName(deletedPhase, corev1.ServiceTypeLoadBalancer), 226 }, 227 } 228 229 func (s *serviceCreationLatencyMeasurement) gather(identifier string) ([]measurement.Summary, error) { 230 klog.V(2).Infof("%s: gathering service created latency measurement...", s) 231 if !s.isRunning { 232 return nil, fmt.Errorf("metric %s has not been started", s) 233 } 234 235 // NOTE: For ClusterIP or NodePort type of service, the cluster ip or node port is assigned as part of service creation API call, so the ipAssigning phase is no sense. 236 serviceCreationLatency := s.creationTimes.CalculateTransitionsLatency(serviceCreationTransitions, measurementutil.MatchAll) 237 238 content, err := util.PrettyPrintJSON(measurementutil.LatencyMapToPerfData(serviceCreationLatency)) 239 if err != nil { 240 return nil, err 241 } 242 summary := measurement.CreateSummary(fmt.Sprintf("%s_%s", serviceCreationLatencyName, identifier), "json", content) 243 return []measurement.Summary{summary}, nil 244 } 245 246 func (s *serviceCreationLatencyMeasurement) handleObject(oldObj, newObj interface{}) { 247 var oldService *corev1.Service 248 var newService *corev1.Service 249 var ok bool 250 oldService, ok = oldObj.(*corev1.Service) 251 if oldObj != nil && !ok { 252 klog.Errorf("%s: uncastable old object: %v", s, oldObj) 253 return 254 } 255 newService, ok = newObj.(*corev1.Service) 256 if newObj != nil && !ok { 257 klog.Errorf("%s: uncastable new object: %v", s, newObj) 258 return 259 } 260 if isEqual := oldService != nil && 261 newService != nil && 262 equality.Semantic.DeepEqual(oldService.Spec, newService.Spec) && 263 equality.Semantic.DeepEqual(oldService.Status, newService.Status); isEqual { 264 return 265 } 266 267 // TODO(#680): Make it thread-safe. 268 if !s.isRunning { 269 return 270 } 271 if newObj == nil { 272 if err := s.deleteObject(oldService); err != nil { 273 klog.Errorf("%s: delete checker error: %v", s, err) 274 } 275 return 276 } 277 if err := s.updateObject(newService); err != nil { 278 klog.Errorf("%s: create checker error: %v", s, err) 279 } 280 } 281 282 func (s *serviceCreationLatencyMeasurement) deleteObject(svc *corev1.Service) error { 283 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(svc) 284 if err != nil { 285 return fmt.Errorf("meta key created error: %v", err) 286 } 287 s.lock.Lock() 288 defer s.lock.Unlock() 289 s.creationTimes.Set(key, phaseName(deletingPhase, svc.Spec.Type), svc.ObjectMeta.DeletionTimestamp.Time) 290 s.creationTimes.Set(key, phaseName(deletedPhase, svc.Spec.Type), time.Now()) 291 s.pingCheckers.DeleteAndStop(key) 292 return nil 293 } 294 295 func (s *serviceCreationLatencyMeasurement) updateObject(svc *corev1.Service) error { 296 // This measurement only works for services with ClusterIP, NodePort and LoadBalancer type. 297 if svc.Spec.Type != corev1.ServiceTypeClusterIP && svc.Spec.Type != corev1.ServiceTypeNodePort && svc.Spec.Type != corev1.ServiceTypeLoadBalancer { 298 return nil 299 } 300 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(svc) 301 if err != nil { 302 return fmt.Errorf("meta key created error: %v", err) 303 } 304 if _, exists := s.creationTimes.Get(key, phaseName(creatingPhase, svc.Spec.Type)); !exists { 305 s.creationTimes.Set(key, phaseName(creatingPhase, svc.Spec.Type), svc.CreationTimestamp.Time) 306 } 307 if svc.Spec.Type == corev1.ServiceTypeLoadBalancer && len(svc.Status.LoadBalancer.Ingress) < 1 { 308 return nil 309 } 310 // NOTE: For ClusterIP or NodePort type of service, the cluster ip or node port is assigned as part of service creation API call, so the ipAssigning phase is no sense. 311 if svc.Spec.Type == corev1.ServiceTypeLoadBalancer { 312 if _, exists := s.creationTimes.Get(key, phaseName(ipAssigningPhase, svc.Spec.Type)); exists { 313 return nil 314 } 315 s.creationTimes.Set(key, phaseName(ipAssigningPhase, svc.Spec.Type), time.Now()) 316 } 317 pc := &pingChecker{ 318 callerName: s.String(), 319 svc: svc, 320 creationTimes: s.creationTimes, 321 stopCh: make(chan struct{}), 322 } 323 pc.run() 324 s.lock.Lock() 325 defer s.lock.Unlock() 326 s.pingCheckers.Add(key, pc) 327 328 return nil 329 } 330 331 func phaseName(phase string, serviceType corev1.ServiceType) string { 332 return fmt.Sprintf("%s_%s", phase, serviceType) 333 } 334 335 type pingChecker struct { 336 callerName string 337 svc *corev1.Service 338 creationTimes *measurementutil.ObjectTransitionTimes 339 stopCh chan struct{} 340 } 341 342 func (p *pingChecker) run() { 343 key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(p.svc) 344 if err != nil { 345 klog.Errorf("%s: meta key created error: %v", p.callerName, err) 346 return 347 } 348 success := 0 349 for { 350 select { 351 case <-p.stopCh: 352 return 353 default: 354 // TODO(#685): Make ping checks less communication heavy. 355 pod, err := execservice.GetPod() 356 if err != nil { 357 klog.Warningf("call to execservice.GetPod() ended with error: %v", err) 358 success = 0 359 time.Sleep(pingBackoff) 360 continue 361 } 362 var ips []string 363 var port int32 364 switch p.svc.Spec.Type { 365 case corev1.ServiceTypeClusterIP: 366 ips = p.svc.Spec.ClusterIPs 367 port = p.svc.Spec.Ports[0].Port 368 case corev1.ServiceTypeNodePort: 369 ips = []string{pod.Status.HostIP} 370 port = p.svc.Spec.Ports[0].NodePort 371 case corev1.ServiceTypeLoadBalancer: 372 for _, ingress := range p.svc.Status.LoadBalancer.Ingress { 373 ips = append(ips, ingress.IP) 374 } 375 port = p.svc.Spec.Ports[0].Port 376 } 377 for _, ip := range ips { 378 address := net.JoinHostPort(ip, fmt.Sprint(port)) 379 command := fmt.Sprintf("curl %s", address) 380 _, err = execservice.RunCommand(context.TODO(), pod, command) 381 if err != nil { 382 break 383 } 384 } 385 if err != nil { 386 success = 0 387 time.Sleep(pingBackoff) 388 continue 389 } 390 success++ 391 if success == pingChecks { 392 p.creationTimes.Set(key, phaseName(reachabilityPhase, p.svc.Spec.Type), time.Now()) 393 return 394 } 395 } 396 } 397 } 398 399 func (p *pingChecker) Stop() { 400 close(p.stopCh) 401 }