k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/loadbalancer_nodesync_latency.go (about) 1 /* 2 Copyright 2021 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package common 18 19 import ( 20 "context" 21 "encoding/json" 22 "fmt" 23 "time" 24 25 v1 "k8s.io/api/core/v1" 26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 "k8s.io/apimachinery/pkg/fields" 28 "k8s.io/apimachinery/pkg/runtime" 29 "k8s.io/apimachinery/pkg/types" 30 "k8s.io/apimachinery/pkg/util/strategicpatch" 31 "k8s.io/apimachinery/pkg/util/wait" 32 "k8s.io/apimachinery/pkg/watch" 33 clientset "k8s.io/client-go/kubernetes" 34 "k8s.io/client-go/tools/cache" 35 "k8s.io/klog/v2" 36 "k8s.io/perf-tests/clusterloader2/pkg/measurement" 37 measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util" 38 "k8s.io/perf-tests/clusterloader2/pkg/util" 39 ) 40 41 const ( 42 loadBalancerNodeSyncLatencyName = "LoadBalancerNodeSyncLatency" 43 defaultNodeSyncLatencyTimeout = 30 * time.Minute 44 45 // excludeFromLoadBalancersLabel is the node label to exclude a node from being a LB backend 46 excludeFromLoadBalancersLabel = "node.kubernetes.io/exclude-from-external-load-balancers" 47 // nodeSyncEventReason is the event reason emitted by service controller when it completes node sync on the lb. 48 nodeSyncEventReason = "UpdatedLoadBalancer" 49 50 phaseNodeSyncStart = "nodesync_triggered" 51 phaseNodeSyncComplete = "nodesync_complete" 52 ) 53 54 var nodeSyncTransition = map[string]measurementutil.Transition{ 55 "nodesync_start_to_complete": { 56 From: phaseNodeSyncStart, 57 To: phaseNodeSyncComplete, 58 }, 59 } 60 61 func init() { 62 if err := measurement.Register(loadBalancerNodeSyncLatencyName, createLoadBalancerNodeSyncMeasurement); err != nil { 63 klog.Fatalf("Cannot register %s: %v", loadBalancerNodeSyncLatencyName, err) 64 } 65 } 66 67 func createLoadBalancerNodeSyncMeasurement() measurement.Measurement { 68 return &LoadBalancerNodeSyncMeasurement{ 69 selector: util.NewObjectSelector(), 70 svcNodeSyncLatencyTracker: measurementutil.NewObjectTransitionTimes(loadBalancerNodeSyncLatencyName), 71 } 72 } 73 74 type LoadBalancerNodeSyncMeasurement struct { 75 client clientset.Interface 76 // selector used to select relevant load balancer type service used for measurement 77 selector *util.ObjectSelector 78 // waitTimeout specify for the timeout for node sync on all LBs to complete 79 waitTimeout time.Duration 80 // svcNodeSyncLatencyTracker tracks the nodesync latency 81 svcNodeSyncLatencyTracker *measurementutil.ObjectTransitionTimes 82 // excludedNodeName is the node name used to trigger LB nodesync 83 excludedNodeName string 84 // lbSvcMap is the map that contains load balancer type service with key (namespaced/name) and service 85 lbSvcMap map[string]v1.Service 86 } 87 88 // LoadBalancerNodeSyncMeasurement takes measurement of node sync latency for selected lb type services. 89 // This measurement only works for K8s 1.19 as it depends on the ExcludeNodeForLoadbalancer label. 90 // Services can be specified by field and/or label selectors. 91 // If namespace is not passed by parameter, all LoadBalancer type service with all-namespace scope is assumed. 92 // "measure" action triggers nodesync and observation of nodesync completion for selected LB services. 93 // "gather" returns node sync latency summary. 94 func (s *LoadBalancerNodeSyncMeasurement) Execute(config *measurement.Config) ([]measurement.Summary, error) { 95 s.client = config.ClusterFramework.GetClientSets().GetClient() 96 action, err := util.GetString(config.Params, "action") 97 if err != nil { 98 return nil, err 99 } 100 switch action { 101 case "measure": 102 if err := s.selector.Parse(config.Params); err != nil { 103 return nil, err 104 } 105 s.waitTimeout, err = util.GetDurationOrDefault(config.Params, "waitTimeout", defaultNodeSyncLatencyTimeout) 106 if err != nil { 107 return nil, err 108 } 109 return nil, s.measureNodeSyncLatency() 110 case "gather": 111 if err := s.labelNodeForLBs(false); err != nil { 112 return nil, err 113 } 114 return s.gather(config.Identifier) 115 default: 116 return nil, fmt.Errorf("unknown action %v", action) 117 } 118 } 119 120 func (s *LoadBalancerNodeSyncMeasurement) Dispose() {} 121 122 func (s *LoadBalancerNodeSyncMeasurement) String() string { 123 return loadBalancerNodeSyncLatencyName + ": " + s.selector.String() 124 } 125 126 func (s *LoadBalancerNodeSyncMeasurement) measureNodeSyncLatency() error { 127 ctx := context.Background() 128 options := metav1.ListOptions{} 129 s.selector.ApplySelectors(&options) 130 svcList, err := s.client.CoreV1().Services(s.selector.Namespace).List(ctx, options) 131 if err != nil { 132 return err 133 } 134 135 s.lbSvcMap = map[string]v1.Service{} 136 for _, svc := range svcList.Items { 137 if svc.Spec.Type == v1.ServiceTypeLoadBalancer { 138 s.lbSvcMap[keyFunc(svc.Namespace, svc.Name)] = svc 139 } 140 } 141 totalLbSvc := len(s.lbSvcMap) 142 143 // Use event informer to keep track of nodeSync events. 144 stopCh := make(chan struct{}) 145 defer close(stopCh) 146 147 eventInformer := s.getEventInformer() 148 go eventInformer.Run(stopCh) 149 150 // trigger node sync by picking a node and add exclude lb label 151 nodeList, err := s.client.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) 152 if err != nil { 153 return err 154 } 155 156 for _, node := range nodeList.Items { 157 if isCandidateNode(node) { 158 s.excludedNodeName = node.Name 159 break 160 } 161 } 162 163 if s.excludedNodeName == "" { 164 return fmt.Errorf("failed to find a node candidate to trigger nodesync from node list: %v", nodeList.Items) 165 } 166 167 defer func() { 168 if err = s.labelNodeForLBs(false); err != nil { 169 klog.Errorf("Failed to label node %v: %v", s.excludedNodeName, err) 170 } 171 172 }() 173 if err = s.labelNodeForLBs(true); err != nil { 174 return err 175 } 176 177 now := time.Now() 178 for key := range s.lbSvcMap { 179 s.svcNodeSyncLatencyTracker.Set(key, phaseNodeSyncStart, now) 180 } 181 182 return wait.Poll(5*time.Second, s.waitTimeout, func() (done bool, err error) { 183 if s.svcNodeSyncLatencyTracker.Count(phaseNodeSyncComplete) == totalLbSvc { 184 return true, nil 185 } 186 klog.V(2).Infof("out of a total of %v LBs, %v LB type service has %q event", totalLbSvc, s.svcNodeSyncLatencyTracker.Count(phaseNodeSyncComplete), nodeSyncEventReason) 187 return false, nil 188 }) 189 } 190 191 func (s *LoadBalancerNodeSyncMeasurement) getEventInformer() cache.Controller { 192 ctx := context.Background() 193 listFunc := func(options metav1.ListOptions) (runtime.Object, error) { 194 o := metav1.ListOptions{ 195 Limit: 1, 196 } 197 result, err := s.client.CoreV1().Events(metav1.NamespaceAll).List(ctx, o) 198 if err != nil { 199 return nil, err 200 } 201 result.Continue = "" 202 result.Items = nil 203 return result, nil 204 } 205 206 watchFunc := func(options metav1.ListOptions) (watch.Interface, error) { 207 options.FieldSelector = fields.Set{"reason": nodeSyncEventReason}.AsSelector().String() 208 return s.client.CoreV1().Events(metav1.NamespaceAll).Watch(ctx, options) 209 } 210 211 _, eventInformer := cache.NewInformer(&cache.ListWatch{ListFunc: listFunc, WatchFunc: watchFunc}, nil, 0, 212 cache.ResourceEventHandlerFuncs{ 213 AddFunc: func(obj interface{}) { 214 s.processEvent(obj.(*v1.Event)) 215 }, 216 }) 217 return eventInformer 218 } 219 220 func (s *LoadBalancerNodeSyncMeasurement) processEvent(event *v1.Event) { 221 if event.Reason != nodeSyncEventReason { 222 return 223 } 224 225 key := keyFunc(event.InvolvedObject.Namespace, event.InvolvedObject.Name) 226 _, ok := s.lbSvcMap[key] 227 if ok { 228 _, found := s.svcNodeSyncLatencyTracker.Get(key, phaseNodeSyncComplete) 229 if !found { 230 s.svcNodeSyncLatencyTracker.Set(key, phaseNodeSyncComplete, event.CreationTimestamp.Time) 231 } 232 } 233 } 234 235 // labelNodeForLBs manipulates candidate node to include or exclude it from being LB backends. 236 func (s *LoadBalancerNodeSyncMeasurement) labelNodeForLBs(exclude bool) error { 237 ctx := context.Background() 238 node, err := s.client.CoreV1().Nodes().Get(ctx, s.excludedNodeName, metav1.GetOptions{}) 239 if err != nil { 240 return err 241 } 242 newNode := node.DeepCopy() 243 244 if exclude { 245 newNode.Labels[excludeFromLoadBalancersLabel] = "true" 246 } else { 247 delete(newNode.Labels, excludeFromLoadBalancersLabel) 248 } 249 250 patchBytes, err := preparePatchBytes(node, newNode, v1.Node{}) 251 if err != nil { 252 return err 253 } 254 255 _, err = s.client.CoreV1().Nodes().Patch(ctx, s.excludedNodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) 256 if err != nil { 257 return err 258 } 259 return nil 260 } 261 262 func (s *LoadBalancerNodeSyncMeasurement) gather(identifier string) ([]measurement.Summary, error) { 263 klog.V(2).Infof("%s: gathering nodesync latency measurement...", s) 264 nodeSyncLatency := s.svcNodeSyncLatencyTracker.CalculateTransitionsLatency(nodeSyncTransition, measurementutil.MatchAll) 265 content, err := util.PrettyPrintJSON(measurementutil.LatencyMapToPerfData(nodeSyncLatency)) 266 if err != nil { 267 return nil, err 268 } 269 270 summary := measurement.CreateSummary(fmt.Sprintf("%s_%s", loadBalancerNodeSyncLatencyName, identifier), "json", content) 271 272 // TODO: return an error here if latency is higher than an upper bound. 273 return []measurement.Summary{summary}, nil 274 } 275 276 // isCandidateNode returns if node can be used to trigger nodesync 277 func isCandidateNode(node v1.Node) bool { 278 if _, hasExcludeBalancerLabel := node.Labels[excludeFromLoadBalancersLabel]; hasExcludeBalancerLabel { 279 return false 280 } 281 // If we have no info, don't accept 282 if len(node.Status.Conditions) == 0 { 283 return false 284 } 285 for _, cond := range node.Status.Conditions { 286 // We consider the node for load balancing only when its NodeReady condition status 287 // is ConditionTrue 288 if cond.Type == v1.NodeReady && cond.Status != v1.ConditionTrue { 289 klog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status) 290 return false 291 } 292 } 293 return true 294 } 295 296 func preparePatchBytes(old, new, refStruct interface{}) ([]byte, error) { 297 oldBytes, err := json.Marshal(old) 298 if err != nil { 299 return nil, fmt.Errorf("failed to marshal old object: %v", err) 300 } 301 302 newBytes, err := json.Marshal(new) 303 if err != nil { 304 return nil, fmt.Errorf("failed to marshal new object: %v", err) 305 } 306 307 patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldBytes, newBytes, refStruct) 308 if err != nil { 309 return nil, fmt.Errorf("failed to create patch: %v", err) 310 } 311 return patchBytes, nil 312 } 313 314 func keyFunc(namespace, name string) string { 315 return fmt.Sprintf("%s/%s", namespace, name) 316 }