k8s.io/kubernetes@v1.29.3/test/e2e/network/networking_perf.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package network
    18  
    19  // Tests network performance using iperf or other containers.
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"time"
    24  
    25  	"github.com/onsi/ginkgo/v2"
    26  	appsv1 "k8s.io/api/apps/v1"
    27  	v1 "k8s.io/api/core/v1"
    28  	discoveryv1 "k8s.io/api/discovery/v1"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	"k8s.io/apimachinery/pkg/util/wait"
    31  	clientset "k8s.io/client-go/kubernetes"
    32  	"k8s.io/kubernetes/test/e2e/feature"
    33  	"k8s.io/kubernetes/test/e2e/framework"
    34  	e2edaemonset "k8s.io/kubernetes/test/e2e/framework/daemonset"
    35  	e2edeployment "k8s.io/kubernetes/test/e2e/framework/deployment"
    36  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    37  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    38  	"k8s.io/kubernetes/test/e2e/network/common"
    39  	imageutils "k8s.io/kubernetes/test/utils/image"
    40  	admissionapi "k8s.io/pod-security-admission/api"
    41  )
    42  
    43  const (
    44  	// use this timeout for larger clusters
    45  	largeClusterTimeout = 400 * time.Second
    46  	// iperf2BaselineBandwidthMegabytesPerSecond sets a baseline for iperf2 bandwidth of 10 MBps = 80 Mbps
    47  	// this limits is chosen in order to support small devices with 100 mbps cards.
    48  	iperf2BaselineBandwidthMegabytesPerSecond = 10
    49  	// iperf2Port selects an arbitrary, unique port to run iperf2's client and server on
    50  	iperf2Port = 6789
    51  	// labelKey is used as a key for selectors
    52  	labelKey = "app"
    53  	// clientLabelValue is used as a value for iperf2 client selectors
    54  	clientLabelValue = "iperf2-client"
    55  	// serverLabelValue is used as a value for iperf2 server selectors
    56  	serverLabelValue = "iperf2-server"
    57  	// serverServiceName defines the service name used for the iperf2 server
    58  	serverServiceName = "iperf2-server"
    59  )
    60  
    61  func iperf2ServerDeployment(ctx context.Context, client clientset.Interface, namespace string, isIPV6 bool) (*appsv1.Deployment, error) {
    62  	framework.Logf("deploying iperf2 server")
    63  	one := int64(1)
    64  	replicas := int32(1)
    65  	labels := map[string]string{labelKey: serverLabelValue}
    66  	args := []string{
    67  		"-s",
    68  		"-p",
    69  		fmt.Sprintf("%d", iperf2Port),
    70  	}
    71  	if isIPV6 {
    72  		args = append(args, "-V")
    73  	}
    74  	deploymentSpec := e2edeployment.NewDeployment(
    75  		"iperf2-server-deployment", replicas, labels, "iperf2-server",
    76  		imageutils.GetE2EImage(imageutils.Agnhost), appsv1.RollingUpdateDeploymentStrategyType)
    77  	deploymentSpec.Spec.Template.Spec.TerminationGracePeriodSeconds = &one
    78  	deploymentSpec.Spec.Template.Spec.Containers[0].Command = []string{"iperf"}
    79  	deploymentSpec.Spec.Template.Spec.Containers[0].Args = args
    80  	deploymentSpec.Spec.Template.Spec.Containers[0].Ports = []v1.ContainerPort{
    81  		{
    82  			ContainerPort: iperf2Port,
    83  			Protocol:      v1.ProtocolTCP,
    84  		},
    85  	}
    86  
    87  	deployment, err := client.AppsV1().Deployments(namespace).Create(ctx, deploymentSpec, metav1.CreateOptions{})
    88  	if err != nil {
    89  		return nil, fmt.Errorf("deployment %q Create API error: %w", deploymentSpec.Name, err)
    90  	}
    91  	framework.Logf("Waiting for deployment %q to complete", deploymentSpec.Name)
    92  	err = e2edeployment.WaitForDeploymentComplete(client, deployment)
    93  	if err != nil {
    94  		return nil, fmt.Errorf("deployment %q failed to complete: %w", deploymentSpec.Name, err)
    95  	}
    96  
    97  	return deployment, nil
    98  }
    99  
   100  func iperf2ServerService(ctx context.Context, client clientset.Interface, namespace string) (*v1.Service, error) {
   101  	service := &v1.Service{
   102  		ObjectMeta: metav1.ObjectMeta{Name: serverServiceName},
   103  		Spec: v1.ServiceSpec{
   104  			Selector: map[string]string{
   105  				labelKey: serverLabelValue,
   106  			},
   107  			Ports: []v1.ServicePort{
   108  				{Protocol: v1.ProtocolTCP, Port: iperf2Port},
   109  			},
   110  		},
   111  	}
   112  	return client.CoreV1().Services(namespace).Create(ctx, service, metav1.CreateOptions{})
   113  }
   114  
   115  func iperf2ClientDaemonSet(ctx context.Context, client clientset.Interface, namespace string) (*appsv1.DaemonSet, error) {
   116  	one := int64(1)
   117  	labels := map[string]string{labelKey: clientLabelValue}
   118  	spec := e2edaemonset.NewDaemonSet("iperf2-clients", imageutils.GetE2EImage(imageutils.Agnhost), labels, nil, nil, nil)
   119  	spec.Spec.Template.Spec.TerminationGracePeriodSeconds = &one
   120  
   121  	ds, err := client.AppsV1().DaemonSets(namespace).Create(ctx, spec, metav1.CreateOptions{})
   122  	if err != nil {
   123  		return nil, fmt.Errorf("daemonset %s Create API error: %w", spec.Name, err)
   124  	}
   125  	return ds, nil
   126  }
   127  
   128  // Test summary:
   129  //
   130  //	This test uses iperf2 to obtain bandwidth data between nodes in the cluster, providing a coarse measure
   131  //	of the health of the cluster network.  The test runs two sets of pods:
   132  //	  1. an iperf2 server on a single node
   133  //	  2. a daemonset of iperf2 clients
   134  //	The test then iterates through the clients, one by one, running iperf2 from each of them to transfer
   135  //	data to the server and back for ten seconds, after which the results are collected and parsed.
   136  //	Thus, if your cluster has 10 nodes, then 10 test runs are performed.
   137  //	  Note: a more complete test could run this scenario with a daemonset of servers as well; however, this
   138  //	  would require n^2 tests, n^2 time, and n^2 network resources which quickly become prohibitively large
   139  //	  as the cluster size increases.
   140  //	Finally, after collecting all data, the results are analyzed and tabulated.
   141  var _ = common.SIGDescribe("Networking IPerf2", feature.NetworkingPerformance, func() {
   142  	// this test runs iperf2: one pod as a server, and a daemonset of clients
   143  	f := framework.NewDefaultFramework("network-perf")
   144  	f.NamespacePodSecurityLevel = admissionapi.LevelBaseline
   145  
   146  	ginkgo.It("should run iperf2", func(ctx context.Context) {
   147  		readySchedulableNodes, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet)
   148  		framework.ExpectNoError(err)
   149  
   150  		familyStr := ""
   151  		if framework.TestContext.ClusterIsIPv6() {
   152  			familyStr = "-V "
   153  		}
   154  
   155  		serverPodsListOptions := metav1.ListOptions{
   156  			LabelSelector: fmt.Sprintf("%s=%s", labelKey, serverLabelValue),
   157  		}
   158  
   159  		// Step 1: set up iperf2 server -- a single pod on any node
   160  		_, err = iperf2ServerDeployment(ctx, f.ClientSet, f.Namespace.Name, framework.TestContext.ClusterIsIPv6())
   161  		framework.ExpectNoError(err, "deploy iperf2 server deployment")
   162  
   163  		_, err = iperf2ServerService(ctx, f.ClientSet, f.Namespace.Name)
   164  		framework.ExpectNoError(err, "deploy iperf2 server service")
   165  
   166  		// Step 2: set up iperf2 client daemonset
   167  		//   initially, the clients don't do anything -- they simply pause until they're called
   168  		_, err = iperf2ClientDaemonSet(ctx, f.ClientSet, f.Namespace.Name)
   169  		framework.ExpectNoError(err, "deploy iperf2 client daemonset")
   170  
   171  		// Make sure the server is ready to go
   172  		framework.Logf("waiting for iperf2 server endpoints")
   173  		err = wait.Poll(2*time.Second, largeClusterTimeout, func() (done bool, err error) {
   174  			listOptions := metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", discoveryv1.LabelServiceName, serverServiceName)}
   175  			esList, err := f.ClientSet.DiscoveryV1().EndpointSlices(f.Namespace.Name).List(ctx, listOptions)
   176  			framework.ExpectNoError(err, "Error fetching EndpointSlice for Service %s/%s", f.Namespace.Name, serverServiceName)
   177  
   178  			if len(esList.Items) == 0 {
   179  				framework.Logf("EndpointSlice for Service %s/%s not found", f.Namespace.Name, serverServiceName)
   180  				return false, nil
   181  			}
   182  			return true, nil
   183  		})
   184  		framework.ExpectNoError(err, "unable to wait for endpoints for the iperf service")
   185  		framework.Logf("found iperf2 server endpoints")
   186  
   187  		clientPodsListOptions := metav1.ListOptions{
   188  			LabelSelector: fmt.Sprintf("%s=%s", labelKey, clientLabelValue),
   189  		}
   190  
   191  		framework.Logf("waiting for client pods to be running")
   192  		var clientPodList *v1.PodList
   193  		err = wait.Poll(2*time.Second, largeClusterTimeout, func() (done bool, err error) {
   194  			clientPodList, err = f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(ctx, clientPodsListOptions)
   195  			if err != nil {
   196  				return false, err
   197  			}
   198  			if len(clientPodList.Items) < len(readySchedulableNodes.Items) {
   199  				return false, nil
   200  			}
   201  			for _, pod := range clientPodList.Items {
   202  				if pod.Status.Phase != v1.PodRunning {
   203  					return false, nil
   204  				}
   205  			}
   206  			return true, nil
   207  		})
   208  		framework.ExpectNoError(err, "unable to wait for client pods to come up")
   209  		framework.Logf("all client pods are ready: %d pods", len(clientPodList.Items))
   210  
   211  		// Get a reference to the server pod for later
   212  		serverPodList, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(ctx, serverPodsListOptions)
   213  		framework.ExpectNoError(err)
   214  		if len(serverPodList.Items) != 1 {
   215  			framework.Failf("expected 1 server pod, found %d", len(serverPodList.Items))
   216  		}
   217  		serverPod := serverPodList.Items[0]
   218  		framework.Logf("server pod phase %s", serverPod.Status.Phase)
   219  		for i, condition := range serverPod.Status.Conditions {
   220  			framework.Logf("server pod condition %d: %+v", i, condition)
   221  		}
   222  		for i, cont := range serverPod.Status.ContainerStatuses {
   223  			framework.Logf("server pod container status %d: %+v", i, cont)
   224  		}
   225  
   226  		framework.Logf("found %d matching client pods", len(clientPodList.Items))
   227  
   228  		nodeResults := &IPerf2NodeToNodeCSVResults{
   229  			ServerNode: serverPod.Spec.NodeName,
   230  			Results:    map[string]*IPerf2EnhancedCSVResults{},
   231  		}
   232  
   233  		// Step 3: iterate through the client pods one by one, running iperf2 in client mode to transfer
   234  		//   data to the server and back and measure bandwidth
   235  		for _, pod := range clientPodList.Items {
   236  			podName := pod.Name
   237  			nodeName := pod.Spec.NodeName
   238  
   239  			iperfVersion := e2epod.ExecShellInPod(ctx, f, podName, "iperf -v || true")
   240  			framework.Logf("iperf version: %s", iperfVersion)
   241  
   242  			for try := 0; ; try++ {
   243  				/* iperf2 command parameters:
   244  				 *  -e: use enhanced reporting giving more tcp/udp and traffic information
   245  				 *  -p %d: server port to connect to
   246  				 *  --reportstyle C: report as Comma-Separated Values
   247  				 *  -i 1: seconds between periodic bandwidth reports
   248  				 *  -c %s: run in client mode, connecting to <host>
   249  				 */
   250  				command := fmt.Sprintf(`iperf %s -e -p %d --reportstyle C -i 1 -c %s && sleep 5`, familyStr, iperf2Port, serverServiceName)
   251  				framework.Logf("attempting to run command '%s' in client pod %s (node %s)", command, podName, nodeName)
   252  				output := e2epod.ExecShellInPod(ctx, f, podName, command)
   253  				framework.Logf("output from exec on client pod %s (node %s): \n%s\n", podName, nodeName, output)
   254  
   255  				results, err := ParseIPerf2EnhancedResultsFromCSV(output)
   256  				if err == nil {
   257  					nodeResults.Results[nodeName] = results
   258  					break
   259  				} else if try == 2 {
   260  					framework.ExpectNoError(err, "unable to parse iperf2 output from client pod %s (node %s)", pod.Name, nodeName)
   261  				} else {
   262  					framework.Logf("Retrying: IPerf run failed: %+v", err)
   263  				}
   264  			}
   265  		}
   266  
   267  		// Step 4: after collecting all the client<->server data, compile and present the results
   268  		/*
   269  						Example output:
   270  
   271  			Dec 22 07:52:41.102: INFO:                                From                                 To    Bandwidth (MB/s)
   272  			Dec 22 07:52:41.102: INFO:              three-node-ipv6-worker            three-node-ipv6-worker2                2381
   273  			Dec 22 07:52:41.102: INFO:             three-node-ipv6-worker2            three-node-ipv6-worker2                2214
   274  			Dec 22 07:52:41.102: INFO:             three-node-ipv6-worker3            three-node-ipv6-worker2                3123
   275  
   276  		*/
   277  		framework.Logf("%35s%35s%20s", "From", "To", "Bandwidth (MB/s)")
   278  		for nodeFrom, results := range nodeResults.Results {
   279  			framework.Logf("%35s%35s%20d", nodeFrom, nodeResults.ServerNode, results.Total.bandwidthMB())
   280  		}
   281  		for clientNode, results := range nodeResults.Results {
   282  			megabytesPerSecond := results.Total.bandwidthMB()
   283  			if megabytesPerSecond < iperf2BaselineBandwidthMegabytesPerSecond {
   284  				framework.Failf("iperf2 MB/s received below baseline of %d for client %s to server %s: %d", iperf2BaselineBandwidthMegabytesPerSecond, clientNode, nodeResults.ServerNode, megabytesPerSecond)
   285  			}
   286  		}
   287  	})
   288  })