k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/loadbalancer_nodesync_latency.go (about)

     1  /*
     2  Copyright 2021 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package common
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"fmt"
    23  	"time"
    24  
    25  	v1 "k8s.io/api/core/v1"
    26  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    27  	"k8s.io/apimachinery/pkg/fields"
    28  	"k8s.io/apimachinery/pkg/runtime"
    29  	"k8s.io/apimachinery/pkg/types"
    30  	"k8s.io/apimachinery/pkg/util/strategicpatch"
    31  	"k8s.io/apimachinery/pkg/util/wait"
    32  	"k8s.io/apimachinery/pkg/watch"
    33  	clientset "k8s.io/client-go/kubernetes"
    34  	"k8s.io/client-go/tools/cache"
    35  	"k8s.io/klog/v2"
    36  	"k8s.io/perf-tests/clusterloader2/pkg/measurement"
    37  	measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util"
    38  	"k8s.io/perf-tests/clusterloader2/pkg/util"
    39  )
    40  
    41  const (
    42  	loadBalancerNodeSyncLatencyName = "LoadBalancerNodeSyncLatency"
    43  	defaultNodeSyncLatencyTimeout   = 30 * time.Minute
    44  
    45  	// excludeFromLoadBalancersLabel is the node label to exclude a node from being a LB backend
    46  	excludeFromLoadBalancersLabel = "node.kubernetes.io/exclude-from-external-load-balancers"
    47  	// nodeSyncEventReason is the event reason emitted by service controller when it completes node sync on the lb.
    48  	nodeSyncEventReason = "UpdatedLoadBalancer"
    49  
    50  	phaseNodeSyncStart    = "nodesync_triggered"
    51  	phaseNodeSyncComplete = "nodesync_complete"
    52  )
    53  
    54  var nodeSyncTransition = map[string]measurementutil.Transition{
    55  	"nodesync_start_to_complete": {
    56  		From: phaseNodeSyncStart,
    57  		To:   phaseNodeSyncComplete,
    58  	},
    59  }
    60  
    61  func init() {
    62  	if err := measurement.Register(loadBalancerNodeSyncLatencyName, createLoadBalancerNodeSyncMeasurement); err != nil {
    63  		klog.Fatalf("Cannot register %s: %v", loadBalancerNodeSyncLatencyName, err)
    64  	}
    65  }
    66  
    67  func createLoadBalancerNodeSyncMeasurement() measurement.Measurement {
    68  	return &LoadBalancerNodeSyncMeasurement{
    69  		selector:                  util.NewObjectSelector(),
    70  		svcNodeSyncLatencyTracker: measurementutil.NewObjectTransitionTimes(loadBalancerNodeSyncLatencyName),
    71  	}
    72  }
    73  
    74  type LoadBalancerNodeSyncMeasurement struct {
    75  	client clientset.Interface
    76  	// selector used to select relevant load balancer type service used for measurement
    77  	selector *util.ObjectSelector
    78  	// waitTimeout specify for the timeout for node sync on all LBs to complete
    79  	waitTimeout time.Duration
    80  	// svcNodeSyncLatencyTracker tracks the nodesync latency
    81  	svcNodeSyncLatencyTracker *measurementutil.ObjectTransitionTimes
    82  	// excludedNodeName is the node name used to trigger LB nodesync
    83  	excludedNodeName string
    84  	// lbSvcMap is the map that contains load balancer type service with key (namespaced/name) and service
    85  	lbSvcMap map[string]v1.Service
    86  }
    87  
    88  // LoadBalancerNodeSyncMeasurement takes measurement of node sync latency for selected lb type services.
    89  // This measurement only works for K8s 1.19 as it depends on the ExcludeNodeForLoadbalancer label.
    90  // Services can be specified by field and/or label selectors.
    91  // If namespace is not passed by parameter, all LoadBalancer type service with all-namespace scope is assumed.
    92  // "measure" action triggers nodesync and observation of nodesync completion for selected LB services.
    93  // "gather" returns node sync latency summary.
    94  func (s *LoadBalancerNodeSyncMeasurement) Execute(config *measurement.Config) ([]measurement.Summary, error) {
    95  	s.client = config.ClusterFramework.GetClientSets().GetClient()
    96  	action, err := util.GetString(config.Params, "action")
    97  	if err != nil {
    98  		return nil, err
    99  	}
   100  	switch action {
   101  	case "measure":
   102  		if err := s.selector.Parse(config.Params); err != nil {
   103  			return nil, err
   104  		}
   105  		s.waitTimeout, err = util.GetDurationOrDefault(config.Params, "waitTimeout", defaultNodeSyncLatencyTimeout)
   106  		if err != nil {
   107  			return nil, err
   108  		}
   109  		return nil, s.measureNodeSyncLatency()
   110  	case "gather":
   111  		if err := s.labelNodeForLBs(false); err != nil {
   112  			return nil, err
   113  		}
   114  		return s.gather(config.Identifier)
   115  	default:
   116  		return nil, fmt.Errorf("unknown action %v", action)
   117  	}
   118  }
   119  
   120  func (s *LoadBalancerNodeSyncMeasurement) Dispose() {}
   121  
   122  func (s *LoadBalancerNodeSyncMeasurement) String() string {
   123  	return loadBalancerNodeSyncLatencyName + ": " + s.selector.String()
   124  }
   125  
   126  func (s *LoadBalancerNodeSyncMeasurement) measureNodeSyncLatency() error {
   127  	ctx := context.Background()
   128  	options := metav1.ListOptions{}
   129  	s.selector.ApplySelectors(&options)
   130  	svcList, err := s.client.CoreV1().Services(s.selector.Namespace).List(ctx, options)
   131  	if err != nil {
   132  		return err
   133  	}
   134  
   135  	s.lbSvcMap = map[string]v1.Service{}
   136  	for _, svc := range svcList.Items {
   137  		if svc.Spec.Type == v1.ServiceTypeLoadBalancer {
   138  			s.lbSvcMap[keyFunc(svc.Namespace, svc.Name)] = svc
   139  		}
   140  	}
   141  	totalLbSvc := len(s.lbSvcMap)
   142  
   143  	// Use event informer to keep track of nodeSync events.
   144  	stopCh := make(chan struct{})
   145  	defer close(stopCh)
   146  
   147  	eventInformer := s.getEventInformer()
   148  	go eventInformer.Run(stopCh)
   149  
   150  	// trigger node sync by picking a node and add exclude lb label
   151  	nodeList, err := s.client.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
   152  	if err != nil {
   153  		return err
   154  	}
   155  
   156  	for _, node := range nodeList.Items {
   157  		if isCandidateNode(node) {
   158  			s.excludedNodeName = node.Name
   159  			break
   160  		}
   161  	}
   162  
   163  	if s.excludedNodeName == "" {
   164  		return fmt.Errorf("failed to find a node candidate to trigger nodesync from node list: %v", nodeList.Items)
   165  	}
   166  
   167  	defer func() {
   168  		if err = s.labelNodeForLBs(false); err != nil {
   169  			klog.Errorf("Failed to label node %v: %v", s.excludedNodeName, err)
   170  		}
   171  
   172  	}()
   173  	if err = s.labelNodeForLBs(true); err != nil {
   174  		return err
   175  	}
   176  
   177  	now := time.Now()
   178  	for key := range s.lbSvcMap {
   179  		s.svcNodeSyncLatencyTracker.Set(key, phaseNodeSyncStart, now)
   180  	}
   181  
   182  	return wait.Poll(5*time.Second, s.waitTimeout, func() (done bool, err error) {
   183  		if s.svcNodeSyncLatencyTracker.Count(phaseNodeSyncComplete) == totalLbSvc {
   184  			return true, nil
   185  		}
   186  		klog.V(2).Infof("out of a total of %v LBs, %v LB type service has %q event", totalLbSvc, s.svcNodeSyncLatencyTracker.Count(phaseNodeSyncComplete), nodeSyncEventReason)
   187  		return false, nil
   188  	})
   189  }
   190  
   191  func (s *LoadBalancerNodeSyncMeasurement) getEventInformer() cache.Controller {
   192  	ctx := context.Background()
   193  	listFunc := func(options metav1.ListOptions) (runtime.Object, error) {
   194  		o := metav1.ListOptions{
   195  			Limit: 1,
   196  		}
   197  		result, err := s.client.CoreV1().Events(metav1.NamespaceAll).List(ctx, o)
   198  		if err != nil {
   199  			return nil, err
   200  		}
   201  		result.Continue = ""
   202  		result.Items = nil
   203  		return result, nil
   204  	}
   205  
   206  	watchFunc := func(options metav1.ListOptions) (watch.Interface, error) {
   207  		options.FieldSelector = fields.Set{"reason": nodeSyncEventReason}.AsSelector().String()
   208  		return s.client.CoreV1().Events(metav1.NamespaceAll).Watch(ctx, options)
   209  	}
   210  
   211  	_, eventInformer := cache.NewInformer(&cache.ListWatch{ListFunc: listFunc, WatchFunc: watchFunc}, nil, 0,
   212  		cache.ResourceEventHandlerFuncs{
   213  			AddFunc: func(obj interface{}) {
   214  				s.processEvent(obj.(*v1.Event))
   215  			},
   216  		})
   217  	return eventInformer
   218  }
   219  
   220  func (s *LoadBalancerNodeSyncMeasurement) processEvent(event *v1.Event) {
   221  	if event.Reason != nodeSyncEventReason {
   222  		return
   223  	}
   224  
   225  	key := keyFunc(event.InvolvedObject.Namespace, event.InvolvedObject.Name)
   226  	_, ok := s.lbSvcMap[key]
   227  	if ok {
   228  		_, found := s.svcNodeSyncLatencyTracker.Get(key, phaseNodeSyncComplete)
   229  		if !found {
   230  			s.svcNodeSyncLatencyTracker.Set(key, phaseNodeSyncComplete, event.CreationTimestamp.Time)
   231  		}
   232  	}
   233  }
   234  
   235  // labelNodeForLBs manipulates candidate node to include or exclude it from being LB backends.
   236  func (s *LoadBalancerNodeSyncMeasurement) labelNodeForLBs(exclude bool) error {
   237  	ctx := context.Background()
   238  	node, err := s.client.CoreV1().Nodes().Get(ctx, s.excludedNodeName, metav1.GetOptions{})
   239  	if err != nil {
   240  		return err
   241  	}
   242  	newNode := node.DeepCopy()
   243  
   244  	if exclude {
   245  		newNode.Labels[excludeFromLoadBalancersLabel] = "true"
   246  	} else {
   247  		delete(newNode.Labels, excludeFromLoadBalancersLabel)
   248  	}
   249  
   250  	patchBytes, err := preparePatchBytes(node, newNode, v1.Node{})
   251  	if err != nil {
   252  		return err
   253  	}
   254  
   255  	_, err = s.client.CoreV1().Nodes().Patch(ctx, s.excludedNodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{})
   256  	if err != nil {
   257  		return err
   258  	}
   259  	return nil
   260  }
   261  
   262  func (s *LoadBalancerNodeSyncMeasurement) gather(identifier string) ([]measurement.Summary, error) {
   263  	klog.V(2).Infof("%s: gathering nodesync latency measurement...", s)
   264  	nodeSyncLatency := s.svcNodeSyncLatencyTracker.CalculateTransitionsLatency(nodeSyncTransition, measurementutil.MatchAll)
   265  	content, err := util.PrettyPrintJSON(measurementutil.LatencyMapToPerfData(nodeSyncLatency))
   266  	if err != nil {
   267  		return nil, err
   268  	}
   269  
   270  	summary := measurement.CreateSummary(fmt.Sprintf("%s_%s", loadBalancerNodeSyncLatencyName, identifier), "json", content)
   271  
   272  	// TODO: return an error here if latency is higher than an upper bound.
   273  	return []measurement.Summary{summary}, nil
   274  }
   275  
   276  // isCandidateNode returns if node can be used to trigger nodesync
   277  func isCandidateNode(node v1.Node) bool {
   278  	if _, hasExcludeBalancerLabel := node.Labels[excludeFromLoadBalancersLabel]; hasExcludeBalancerLabel {
   279  		return false
   280  	}
   281  	// If we have no info, don't accept
   282  	if len(node.Status.Conditions) == 0 {
   283  		return false
   284  	}
   285  	for _, cond := range node.Status.Conditions {
   286  		// We consider the node for load balancing only when its NodeReady condition status
   287  		// is ConditionTrue
   288  		if cond.Type == v1.NodeReady && cond.Status != v1.ConditionTrue {
   289  			klog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
   290  			return false
   291  		}
   292  	}
   293  	return true
   294  }
   295  
   296  func preparePatchBytes(old, new, refStruct interface{}) ([]byte, error) {
   297  	oldBytes, err := json.Marshal(old)
   298  	if err != nil {
   299  		return nil, fmt.Errorf("failed to marshal old object: %v", err)
   300  	}
   301  
   302  	newBytes, err := json.Marshal(new)
   303  	if err != nil {
   304  		return nil, fmt.Errorf("failed to marshal new object: %v", err)
   305  	}
   306  
   307  	patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldBytes, newBytes, refStruct)
   308  	if err != nil {
   309  		return nil, fmt.Errorf("failed to create patch: %v", err)
   310  	}
   311  	return patchBytes, nil
   312  }
   313  
   314  func keyFunc(namespace, name string) string {
   315  	return fmt.Sprintf("%s/%s", namespace, name)
   316  }