k8s.io/kubernetes@v1.29.3/test/e2e/network/loadbalancer.go

k8s.io/kubernetes@v1.29.3/test/e2e/network/loadbalancer.go (about)

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package network
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"io"
    23  	"math/big"
    24  	"net"
    25  	"net/http"
    26  	"strconv"
    27  	"strings"
    28  	"sync"
    29  	"sync/atomic"
    30  	"time"
    31  
    32  	compute "google.golang.org/api/compute/v1"
    33  
    34  	appsv1 "k8s.io/api/apps/v1"
    35  	v1 "k8s.io/api/core/v1"
    36  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    37  	"k8s.io/apimachinery/pkg/types"
    38  	"k8s.io/apimachinery/pkg/util/intstr"
    39  	utilnet "k8s.io/apimachinery/pkg/util/net"
    40  	"k8s.io/apimachinery/pkg/util/sets"
    41  	"k8s.io/apimachinery/pkg/util/wait"
    42  	clientset "k8s.io/client-go/kubernetes"
    43  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    44  	e2eapps "k8s.io/kubernetes/test/e2e/apps"
    45  	"k8s.io/kubernetes/test/e2e/framework"
    46  	e2edaemonset "k8s.io/kubernetes/test/e2e/framework/daemonset"
    47  	e2edeployment "k8s.io/kubernetes/test/e2e/framework/deployment"
    48  	e2enetwork "k8s.io/kubernetes/test/e2e/framework/network"
    49  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    50  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    51  	e2eoutput "k8s.io/kubernetes/test/e2e/framework/pod/output"
    52  	"k8s.io/kubernetes/test/e2e/framework/providers/gce"
    53  	e2erc "k8s.io/kubernetes/test/e2e/framework/rc"
    54  	e2eservice "k8s.io/kubernetes/test/e2e/framework/service"
    55  	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
    56  	"k8s.io/kubernetes/test/e2e/network/common"
    57  	admissionapi "k8s.io/pod-security-admission/api"
    58  	netutils "k8s.io/utils/net"
    59  	utilpointer "k8s.io/utils/pointer"
    60  
    61  	"github.com/onsi/ginkgo/v2"
    62  	"github.com/onsi/gomega"
    63  )
    64  
    65  // getInternalIP returns node internal IP
    66  func getInternalIP(node *v1.Node) (string, error) {
    67  	for _, address := range node.Status.Addresses {
    68  		if address.Type == v1.NodeInternalIP && address.Address != "" {
    69  			return address.Address, nil
    70  		}
    71  	}
    72  	return "", fmt.Errorf("couldn't get the internal IP of host %s with addresses %v", node.Name, node.Status.Addresses)
    73  }
    74  
    75  // getSubnetPrefix returns a network prefix based on one of the workers
    76  // InternalIP adding a /16 or /64 mask depending on the IP family of the node.
    77  // IMPORTANT: These assumes a flat network assigned to the nodes, that is common
    78  // on cloud providers.
    79  func getSubnetPrefix(ctx context.Context, c clientset.Interface) (*net.IPNet, error) {
    80  	node, err := getReadySchedulableWorkerNode(ctx, c)
    81  	if err != nil {
    82  		return nil, fmt.Errorf("error getting a ready schedulable worker Node, err: %w", err)
    83  	}
    84  	internalIP, err := getInternalIP(node)
    85  	if err != nil {
    86  		return nil, fmt.Errorf("error getting Node internal IP, err: %w", err)
    87  	}
    88  	ip := netutils.ParseIPSloppy(internalIP)
    89  	if ip == nil {
    90  		return nil, fmt.Errorf("invalid IP address format: %s", internalIP)
    91  	}
    92  
    93  	// if IPv6 return a net.IPNet with IP = ip and mask /64
    94  	ciderMask := net.CIDRMask(64, 128)
    95  	// if IPv4 return a net.IPNet with IP = ip and mask /16
    96  	if netutils.IsIPv4(ip) {
    97  		ciderMask = net.CIDRMask(16, 32)
    98  	}
    99  	return &net.IPNet{IP: ip.Mask(ciderMask), Mask: ciderMask}, nil
   100  }
   101  
   102  // getReadySchedulableWorkerNode gets a single worker node which is available for
   103  // running pods on. If there are no such available nodes it will return an error.
   104  func getReadySchedulableWorkerNode(ctx context.Context, c clientset.Interface) (*v1.Node, error) {
   105  	nodes, err := e2enode.GetReadySchedulableNodes(ctx, c)
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  	for i := range nodes.Items {
   110  		node := nodes.Items[i]
   111  		_, isMaster := node.Labels["node-role.kubernetes.io/master"]
   112  		_, isControlPlane := node.Labels["node-role.kubernetes.io/control-plane"]
   113  		if !isMaster && !isControlPlane {
   114  			return &node, nil
   115  		}
   116  	}
   117  	return nil, fmt.Errorf("there are currently no ready, schedulable worker nodes in the cluster")
   118  }
   119  
   120  var _ = common.SIGDescribe("LoadBalancers", func() {
   121  	f := framework.NewDefaultFramework("loadbalancers")
   122  	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
   123  
   124  	var cs clientset.Interface
   125  	var subnetPrefix *net.IPNet
   126  	var err error
   127  
   128  	ginkgo.BeforeEach(func(ctx context.Context) {
   129  		cs = f.ClientSet
   130  		subnetPrefix, err = getSubnetPrefix(ctx, cs)
   131  		framework.ExpectNoError(err)
   132  	})
   133  
   134  	ginkgo.AfterEach(func(ctx context.Context) {
   135  		if ginkgo.CurrentSpecReport().Failed() {
   136  			DescribeSvc(f.Namespace.Name)
   137  		}
   138  	})
   139  
   140  	f.It("should be able to change the type and ports of a TCP service", f.WithSlow(), func(ctx context.Context) {
   141  		// requires cloud load-balancer support
   142  		e2eskipper.SkipUnlessProviderIs("gce", "gke", "aws")
   143  
   144  		loadBalancerLagTimeout := e2eservice.LoadBalancerLagTimeoutDefault
   145  		if framework.ProviderIs("aws") {
   146  			loadBalancerLagTimeout = e2eservice.LoadBalancerLagTimeoutAWS
   147  		}
   148  		loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)
   149  
   150  		// This test is more monolithic than we'd like because LB turnup can be
   151  		// very slow, so we lumped all the tests into one LB lifecycle.
   152  
   153  		serviceName := "mutability-test"
   154  		ns1 := f.Namespace.Name // LB1 in ns1 on TCP
   155  		framework.Logf("namespace for TCP test: %s", ns1)
   156  
   157  		ginkgo.By("creating a TCP service " + serviceName + " with type=ClusterIP in namespace " + ns1)
   158  		tcpJig := e2eservice.NewTestJig(cs, ns1, serviceName)
   159  		tcpService, err := tcpJig.CreateTCPService(ctx, nil)
   160  		framework.ExpectNoError(err)
   161  
   162  		svcPort := int(tcpService.Spec.Ports[0].Port)
   163  		framework.Logf("service port TCP: %d", svcPort)
   164  
   165  		ginkgo.By("creating a pod to be part of the TCP service " + serviceName)
   166  		_, err = tcpJig.Run(ctx, nil)
   167  		framework.ExpectNoError(err)
   168  
   169  		execPod := e2epod.CreateExecPodOrFail(ctx, cs, ns1, "execpod", nil)
   170  		err = tcpJig.CheckServiceReachability(ctx, tcpService, execPod)
   171  		framework.ExpectNoError(err)
   172  
   173  		// Change the services to NodePort.
   174  
   175  		ginkgo.By("changing the TCP service to type=NodePort")
   176  		tcpService, err = tcpJig.UpdateService(ctx, func(s *v1.Service) {
   177  			s.Spec.Type = v1.ServiceTypeNodePort
   178  		})
   179  		framework.ExpectNoError(err)
   180  		tcpNodePort := int(tcpService.Spec.Ports[0].NodePort)
   181  		framework.Logf("TCP node port: %d", tcpNodePort)
   182  
   183  		err = tcpJig.CheckServiceReachability(ctx, tcpService, execPod)
   184  		framework.ExpectNoError(err)
   185  
   186  		// Change the services to LoadBalancer.
   187  
   188  		// Here we test that LoadBalancers can receive static IP addresses.  This isn't
   189  		// necessary, but is an additional feature this monolithic test checks.
   190  		requestedIP := ""
   191  		staticIPName := ""
   192  		if framework.ProviderIs("gce", "gke") {
   193  			ginkgo.By("creating a static load balancer IP")
   194  			staticIPName = fmt.Sprintf("e2e-external-lb-test-%s", framework.RunID)
   195  			gceCloud, err := gce.GetGCECloud()
   196  			framework.ExpectNoError(err, "failed to get GCE cloud provider")
   197  
   198  			err = gceCloud.ReserveRegionAddress(&compute.Address{Name: staticIPName}, gceCloud.Region())
   199  			defer func() {
   200  				if staticIPName != "" {
   201  					// Release GCE static IP - this is not kube-managed and will not be automatically released.
   202  					if err := gceCloud.DeleteRegionAddress(staticIPName, gceCloud.Region()); err != nil {
   203  						framework.Logf("failed to release static IP %s: %v", staticIPName, err)
   204  					}
   205  				}
   206  			}()
   207  			framework.ExpectNoError(err, "failed to create region address: %s", staticIPName)
   208  			reservedAddr, err := gceCloud.GetRegionAddress(staticIPName, gceCloud.Region())
   209  			framework.ExpectNoError(err, "failed to get region address: %s", staticIPName)
   210  
   211  			requestedIP = reservedAddr.Address
   212  			framework.Logf("Allocated static load balancer IP: %s", requestedIP)
   213  		}
   214  
   215  		ginkgo.By("changing the TCP service to type=LoadBalancer")
   216  		_, err = tcpJig.UpdateService(ctx, func(s *v1.Service) {
   217  			s.Spec.LoadBalancerIP = requestedIP // will be "" if not applicable
   218  			s.Spec.Type = v1.ServiceTypeLoadBalancer
   219  		})
   220  		framework.ExpectNoError(err)
   221  
   222  		ginkgo.By("waiting for the TCP service to have a load balancer")
   223  		// Wait for the load balancer to be created asynchronously
   224  		tcpService, err = tcpJig.WaitForLoadBalancer(ctx, loadBalancerCreateTimeout)
   225  		framework.ExpectNoError(err)
   226  		if int(tcpService.Spec.Ports[0].NodePort) != tcpNodePort {
   227  			framework.Failf("TCP Spec.Ports[0].NodePort changed (%d -> %d) when not expected", tcpNodePort, tcpService.Spec.Ports[0].NodePort)
   228  		}
   229  		if requestedIP != "" && e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]) != requestedIP {
   230  			framework.Failf("unexpected TCP Status.LoadBalancer.Ingress (expected %s, got %s)", requestedIP, e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]))
   231  		}
   232  		tcpIngressIP := e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0])
   233  		framework.Logf("TCP load balancer: %s", tcpIngressIP)
   234  
   235  		if framework.ProviderIs("gce", "gke") {
   236  			// Do this as early as possible, which overrides the `defer` above.
   237  			// This is mostly out of fear of leaking the IP in a timeout case
   238  			// (as of this writing we're not 100% sure where the leaks are
   239  			// coming from, so this is first-aid rather than surgery).
   240  			ginkgo.By("demoting the static IP to ephemeral")
   241  			if staticIPName != "" {
   242  				gceCloud, err := gce.GetGCECloud()
   243  				framework.ExpectNoError(err, "failed to get GCE cloud provider")
   244  				// Deleting it after it is attached "demotes" it to an
   245  				// ephemeral IP, which can be auto-released.
   246  				if err := gceCloud.DeleteRegionAddress(staticIPName, gceCloud.Region()); err != nil {
   247  					framework.Failf("failed to release static IP %s: %v", staticIPName, err)
   248  				}
   249  				staticIPName = ""
   250  			}
   251  		}
   252  
   253  		err = tcpJig.CheckServiceReachability(ctx, tcpService, execPod)
   254  		framework.ExpectNoError(err)
   255  
   256  		ginkgo.By("hitting the TCP service's LoadBalancer")
   257  		e2eservice.TestReachableHTTP(ctx, tcpIngressIP, svcPort, loadBalancerLagTimeout)
   258  
   259  		// Change the services' node ports.
   260  
   261  		ginkgo.By("changing the TCP service's NodePort")
   262  		tcpService, err = tcpJig.ChangeServiceNodePort(ctx, tcpNodePort)
   263  		framework.ExpectNoError(err)
   264  		tcpNodePortOld := tcpNodePort
   265  		tcpNodePort = int(tcpService.Spec.Ports[0].NodePort)
   266  		if tcpNodePort == tcpNodePortOld {
   267  			framework.Failf("TCP Spec.Ports[0].NodePort (%d) did not change", tcpNodePort)
   268  		}
   269  		if e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]) != tcpIngressIP {
   270  			framework.Failf("TCP Status.LoadBalancer.Ingress changed (%s -> %s) when not expected", tcpIngressIP, e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]))
   271  		}
   272  		framework.Logf("TCP node port: %d", tcpNodePort)
   273  
   274  		ginkgo.By("hitting the TCP service's LoadBalancer")
   275  		e2eservice.TestReachableHTTP(ctx, tcpIngressIP, svcPort, loadBalancerLagTimeout)
   276  
   277  		// Change the services' main ports.
   278  
   279  		ginkgo.By("changing the TCP service's port")
   280  		tcpService, err = tcpJig.UpdateService(ctx, func(s *v1.Service) {
   281  			s.Spec.Ports[0].Port++
   282  		})
   283  		framework.ExpectNoError(err)
   284  		svcPortOld := svcPort
   285  		svcPort = int(tcpService.Spec.Ports[0].Port)
   286  		if svcPort == svcPortOld {
   287  			framework.Failf("TCP Spec.Ports[0].Port (%d) did not change", svcPort)
   288  		}
   289  		if int(tcpService.Spec.Ports[0].NodePort) != tcpNodePort {
   290  			framework.Failf("TCP Spec.Ports[0].NodePort (%d) changed", tcpService.Spec.Ports[0].NodePort)
   291  		}
   292  		if e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]) != tcpIngressIP {
   293  			framework.Failf("TCP Status.LoadBalancer.Ingress changed (%s -> %s) when not expected", tcpIngressIP, e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]))
   294  		}
   295  
   296  		framework.Logf("service port TCP: %d", svcPort)
   297  
   298  		ginkgo.By("hitting the TCP service's LoadBalancer")
   299  		e2eservice.TestReachableHTTP(ctx, tcpIngressIP, svcPort, loadBalancerCreateTimeout)
   300  
   301  		ginkgo.By("Scaling the pods to 0")
   302  		err = tcpJig.Scale(ctx, 0)
   303  		framework.ExpectNoError(err)
   304  
   305  		ginkgo.By("looking for ICMP REJECT on the TCP service's LoadBalancer")
   306  		testRejectedHTTP(tcpIngressIP, svcPort, loadBalancerCreateTimeout)
   307  
   308  		ginkgo.By("Scaling the pods to 1")
   309  		err = tcpJig.Scale(ctx, 1)
   310  		framework.ExpectNoError(err)
   311  
   312  		ginkgo.By("hitting the TCP service's LoadBalancer")
   313  		e2eservice.TestReachableHTTP(ctx, tcpIngressIP, svcPort, loadBalancerCreateTimeout)
   314  
   315  		// Change the services back to ClusterIP.
   316  
   317  		ginkgo.By("changing TCP service back to type=ClusterIP")
   318  		tcpReadback, err := tcpJig.UpdateService(ctx, func(s *v1.Service) {
   319  			s.Spec.Type = v1.ServiceTypeClusterIP
   320  		})
   321  		framework.ExpectNoError(err)
   322  		if tcpReadback.Spec.Ports[0].NodePort != 0 {
   323  			framework.Fail("TCP Spec.Ports[0].NodePort was not cleared")
   324  		}
   325  		// Wait for the load balancer to be destroyed asynchronously
   326  		_, err = tcpJig.WaitForLoadBalancerDestroy(ctx, tcpIngressIP, svcPort, loadBalancerCreateTimeout)
   327  		framework.ExpectNoError(err)
   328  
   329  		ginkgo.By("checking the TCP LoadBalancer is closed")
   330  		testNotReachableHTTP(tcpIngressIP, svcPort, loadBalancerLagTimeout)
   331  	})
   332  
   333  	f.It("should be able to change the type and ports of a UDP service", f.WithSlow(), func(ctx context.Context) {
   334  		// requires cloud load-balancer support
   335  		e2eskipper.SkipUnlessProviderIs("gce", "gke")
   336  
   337  		loadBalancerLagTimeout := e2eservice.LoadBalancerLagTimeoutDefault
   338  		loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)
   339  
   340  		// This test is more monolithic than we'd like because LB turnup can be
   341  		// very slow, so we lumped all the tests into one LB lifecycle.
   342  
   343  		serviceName := "mutability-test"
   344  		ns2 := f.Namespace.Name // LB1 in ns2 on TCP
   345  		framework.Logf("namespace for TCP test: %s", ns2)
   346  
   347  		ginkgo.By("creating a UDP service " + serviceName + " with type=ClusterIP in namespace " + ns2)
   348  		udpJig := e2eservice.NewTestJig(cs, ns2, serviceName)
   349  		udpService, err := udpJig.CreateUDPService(ctx, nil)
   350  		framework.ExpectNoError(err)
   351  
   352  		svcPort := int(udpService.Spec.Ports[0].Port)
   353  		framework.Logf("service port UDP: %d", svcPort)
   354  
   355  		ginkgo.By("creating a pod to be part of the UDP service " + serviceName)
   356  		_, err = udpJig.Run(ctx, nil)
   357  		framework.ExpectNoError(err)
   358  
   359  		execPod := e2epod.CreateExecPodOrFail(ctx, cs, ns2, "execpod", nil)
   360  		err = udpJig.CheckServiceReachability(ctx, udpService, execPod)
   361  		framework.ExpectNoError(err)
   362  
   363  		// Change the services to NodePort.
   364  
   365  		ginkgo.By("changing the UDP service to type=NodePort")
   366  		udpService, err = udpJig.UpdateService(ctx, func(s *v1.Service) {
   367  			s.Spec.Type = v1.ServiceTypeNodePort
   368  		})
   369  		framework.ExpectNoError(err)
   370  		udpNodePort := int(udpService.Spec.Ports[0].NodePort)
   371  		framework.Logf("UDP node port: %d", udpNodePort)
   372  
   373  		err = udpJig.CheckServiceReachability(ctx, udpService, execPod)
   374  		framework.ExpectNoError(err)
   375  
   376  		// Change the services to LoadBalancer.
   377  
   378  		// Here we test that LoadBalancers can receive static IP addresses.  This isn't
   379  		// necessary, but is an additional feature this monolithic test checks.
   380  		requestedIP := ""
   381  		staticIPName := ""
   382  		ginkgo.By("creating a static load balancer IP")
   383  		staticIPName = fmt.Sprintf("e2e-external-lb-test-%s", framework.RunID)
   384  		gceCloud, err := gce.GetGCECloud()
   385  		framework.ExpectNoError(err, "failed to get GCE cloud provider")
   386  
   387  		err = gceCloud.ReserveRegionAddress(&compute.Address{Name: staticIPName}, gceCloud.Region())
   388  		defer func() {
   389  			if staticIPName != "" {
   390  				// Release GCE static IP - this is not kube-managed and will not be automatically released.
   391  				if err := gceCloud.DeleteRegionAddress(staticIPName, gceCloud.Region()); err != nil {
   392  					framework.Logf("failed to release static IP %s: %v", staticIPName, err)
   393  				}
   394  			}
   395  		}()
   396  		framework.ExpectNoError(err, "failed to create region address: %s", staticIPName)
   397  		reservedAddr, err := gceCloud.GetRegionAddress(staticIPName, gceCloud.Region())
   398  		framework.ExpectNoError(err, "failed to get region address: %s", staticIPName)
   399  
   400  		requestedIP = reservedAddr.Address
   401  		framework.Logf("Allocated static load balancer IP: %s", requestedIP)
   402  
   403  		ginkgo.By("changing the UDP service to type=LoadBalancer")
   404  		_, err = udpJig.UpdateService(ctx, func(s *v1.Service) {
   405  			s.Spec.Type = v1.ServiceTypeLoadBalancer
   406  		})
   407  		framework.ExpectNoError(err)
   408  
   409  		// Do this as early as possible, which overrides the `defer` above.
   410  		// This is mostly out of fear of leaking the IP in a timeout case
   411  		// (as of this writing we're not 100% sure where the leaks are
   412  		// coming from, so this is first-aid rather than surgery).
   413  		ginkgo.By("demoting the static IP to ephemeral")
   414  		if staticIPName != "" {
   415  			gceCloud, err := gce.GetGCECloud()
   416  			framework.ExpectNoError(err, "failed to get GCE cloud provider")
   417  			// Deleting it after it is attached "demotes" it to an
   418  			// ephemeral IP, which can be auto-released.
   419  			if err := gceCloud.DeleteRegionAddress(staticIPName, gceCloud.Region()); err != nil {
   420  				framework.Failf("failed to release static IP %s: %v", staticIPName, err)
   421  			}
   422  			staticIPName = ""
   423  		}
   424  
   425  		var udpIngressIP string
   426  		ginkgo.By("waiting for the UDP service to have a load balancer")
   427  		// 2nd one should be faster since they ran in parallel.
   428  		udpService, err = udpJig.WaitForLoadBalancer(ctx, loadBalancerCreateTimeout)
   429  		framework.ExpectNoError(err)
   430  		if int(udpService.Spec.Ports[0].NodePort) != udpNodePort {
   431  			framework.Failf("UDP Spec.Ports[0].NodePort changed (%d -> %d) when not expected", udpNodePort, udpService.Spec.Ports[0].NodePort)
   432  		}
   433  		udpIngressIP = e2eservice.GetIngressPoint(&udpService.Status.LoadBalancer.Ingress[0])
   434  		framework.Logf("UDP load balancer: %s", udpIngressIP)
   435  
   436  		err = udpJig.CheckServiceReachability(ctx, udpService, execPod)
   437  		framework.ExpectNoError(err)
   438  
   439  		ginkgo.By("hitting the UDP service's LoadBalancer")
   440  		testReachableUDP(udpIngressIP, svcPort, loadBalancerLagTimeout)
   441  
   442  		// Change the services' node ports.
   443  
   444  		ginkgo.By("changing the UDP service's NodePort")
   445  		udpService, err = udpJig.ChangeServiceNodePort(ctx, udpNodePort)
   446  		framework.ExpectNoError(err)
   447  		udpNodePortOld := udpNodePort
   448  		udpNodePort = int(udpService.Spec.Ports[0].NodePort)
   449  		if udpNodePort == udpNodePortOld {
   450  			framework.Failf("UDP Spec.Ports[0].NodePort (%d) did not change", udpNodePort)
   451  		}
   452  		if e2eservice.GetIngressPoint(&udpService.Status.LoadBalancer.Ingress[0]) != udpIngressIP {
   453  			framework.Failf("UDP Status.LoadBalancer.Ingress changed (%s -> %s) when not expected", udpIngressIP, e2eservice.GetIngressPoint(&udpService.Status.LoadBalancer.Ingress[0]))
   454  		}
   455  		framework.Logf("UDP node port: %d", udpNodePort)
   456  
   457  		err = udpJig.CheckServiceReachability(ctx, udpService, execPod)
   458  		framework.ExpectNoError(err)
   459  
   460  		ginkgo.By("hitting the UDP service's LoadBalancer")
   461  		testReachableUDP(udpIngressIP, svcPort, loadBalancerLagTimeout)
   462  
   463  		// Change the services' main ports.
   464  
   465  		ginkgo.By("changing the UDP service's port")
   466  		udpService, err = udpJig.UpdateService(ctx, func(s *v1.Service) {
   467  			s.Spec.Ports[0].Port++
   468  		})
   469  		framework.ExpectNoError(err)
   470  		svcPortOld := svcPort
   471  		svcPort = int(udpService.Spec.Ports[0].Port)
   472  		if svcPort == svcPortOld {
   473  			framework.Failf("UDP Spec.Ports[0].Port (%d) did not change", svcPort)
   474  		}
   475  		if int(udpService.Spec.Ports[0].NodePort) != udpNodePort {
   476  			framework.Failf("UDP Spec.Ports[0].NodePort (%d) changed", udpService.Spec.Ports[0].NodePort)
   477  		}
   478  		if e2eservice.GetIngressPoint(&udpService.Status.LoadBalancer.Ingress[0]) != udpIngressIP {
   479  			framework.Failf("UDP Status.LoadBalancer.Ingress changed (%s -> %s) when not expected", udpIngressIP, e2eservice.GetIngressPoint(&udpService.Status.LoadBalancer.Ingress[0]))
   480  		}
   481  
   482  		framework.Logf("service port UDP: %d", svcPort)
   483  
   484  		ginkgo.By("hitting the UDP service's NodePort")
   485  		err = udpJig.CheckServiceReachability(ctx, udpService, execPod)
   486  		framework.ExpectNoError(err)
   487  
   488  		ginkgo.By("hitting the UDP service's LoadBalancer")
   489  		testReachableUDP(udpIngressIP, svcPort, loadBalancerCreateTimeout)
   490  
   491  		ginkgo.By("Scaling the pods to 0")
   492  		err = udpJig.Scale(ctx, 0)
   493  		framework.ExpectNoError(err)
   494  
   495  		ginkgo.By("looking for ICMP REJECT on the UDP service's LoadBalancer")
   496  		testRejectedUDP(udpIngressIP, svcPort, loadBalancerCreateTimeout)
   497  
   498  		ginkgo.By("Scaling the pods to 1")
   499  		err = udpJig.Scale(ctx, 1)
   500  		framework.ExpectNoError(err)
   501  
   502  		ginkgo.By("hitting the UDP service's NodePort")
   503  		err = udpJig.CheckServiceReachability(ctx, udpService, execPod)
   504  		framework.ExpectNoError(err)
   505  
   506  		ginkgo.By("hitting the UDP service's LoadBalancer")
   507  		testReachableUDP(udpIngressIP, svcPort, loadBalancerCreateTimeout)
   508  
   509  		// Change the services back to ClusterIP.
   510  
   511  		ginkgo.By("changing UDP service back to type=ClusterIP")
   512  		udpReadback, err := udpJig.UpdateService(ctx, func(s *v1.Service) {
   513  			s.Spec.Type = v1.ServiceTypeClusterIP
   514  		})
   515  		framework.ExpectNoError(err)
   516  		if udpReadback.Spec.Ports[0].NodePort != 0 {
   517  			framework.Fail("UDP Spec.Ports[0].NodePort was not cleared")
   518  		}
   519  		// Wait for the load balancer to be destroyed asynchronously
   520  		_, err = udpJig.WaitForLoadBalancerDestroy(ctx, udpIngressIP, svcPort, loadBalancerCreateTimeout)
   521  		framework.ExpectNoError(err)
   522  
   523  		ginkgo.By("checking the UDP LoadBalancer is closed")
   524  		testNotReachableUDP(udpIngressIP, svcPort, loadBalancerLagTimeout)
   525  	})
   526  
   527  	f.It("should only allow access from service loadbalancer source ranges", f.WithSlow(), func(ctx context.Context) {
   528  		// this feature currently supported only on GCE/GKE/AWS/AZURE
   529  		e2eskipper.SkipUnlessProviderIs("gce", "gke", "aws", "azure")
   530  
   531  		loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)
   532  
   533  		namespace := f.Namespace.Name
   534  		serviceName := "lb-sourcerange"
   535  		jig := e2eservice.NewTestJig(cs, namespace, serviceName)
   536  
   537  		ginkgo.By("Prepare allow source ips")
   538  		// prepare the exec pods
   539  		// acceptPod are allowed to access the loadbalancer
   540  		acceptPod := e2epod.CreateExecPodOrFail(ctx, cs, namespace, "execpod-accept", nil)
   541  		dropPod := e2epod.CreateExecPodOrFail(ctx, cs, namespace, "execpod-drop", nil)
   542  
   543  		ginkgo.By("creating a pod to be part of the service " + serviceName)
   544  		// This container is an nginx container listening on port 80
   545  		// See kubernetes/contrib/ingress/echoheaders/nginx.conf for content of response
   546  		_, err := jig.Run(ctx, nil)
   547  		framework.ExpectNoError(err)
   548  		// Make sure acceptPod is running. There are certain chances that pod might be terminated due to unexpected reasons.
   549  		acceptPod, err = cs.CoreV1().Pods(namespace).Get(ctx, acceptPod.Name, metav1.GetOptions{})
   550  		framework.ExpectNoError(err, "Unable to get pod %s", acceptPod.Name)
   551  		gomega.Expect(acceptPod.Status.Phase).To(gomega.Equal(v1.PodRunning))
   552  		gomega.Expect(acceptPod.Status.PodIP).ToNot(gomega.BeEmpty())
   553  
   554  		// Create loadbalancer service with source range from node[0] and podAccept
   555  		svc, err := jig.CreateTCPService(ctx, func(svc *v1.Service) {
   556  			svc.Spec.Type = v1.ServiceTypeLoadBalancer
   557  			svc.Spec.LoadBalancerSourceRanges = []string{acceptPod.Status.PodIP + "/32"}
   558  		})
   559  		framework.ExpectNoError(err)
   560  
   561  		ginkgo.DeferCleanup(func(ctx context.Context) {
   562  			ginkgo.By("Clean up loadbalancer service")
   563  			e2eservice.WaitForServiceDeletedWithFinalizer(ctx, cs, svc.Namespace, svc.Name)
   564  		})
   565  
   566  		svc, err = jig.WaitForLoadBalancer(ctx, loadBalancerCreateTimeout)
   567  		framework.ExpectNoError(err)
   568  
   569  		ginkgo.By("check reachability from different sources")
   570  		svcIP := e2eservice.GetIngressPoint(&svc.Status.LoadBalancer.Ingress[0])
   571  		// We should wait until service changes are actually propagated in the cloud-provider,
   572  		// as this may take significant amount of time, especially in large clusters.
   573  		// However, the information whether it was already programmed isn't achievable.
   574  		// So we're resolving it by using loadBalancerCreateTimeout that takes cluster size into account.
   575  		checkReachabilityFromPod(true, loadBalancerCreateTimeout, namespace, acceptPod.Name, svcIP)
   576  		checkReachabilityFromPod(false, loadBalancerCreateTimeout, namespace, dropPod.Name, svcIP)
   577  
   578  		// Make sure dropPod is running. There are certain chances that the pod might be terminated due to unexpected reasons.
   579  		dropPod, err = cs.CoreV1().Pods(namespace).Get(ctx, dropPod.Name, metav1.GetOptions{})
   580  		framework.ExpectNoError(err, "Unable to get pod %s", dropPod.Name)
   581  		gomega.Expect(acceptPod.Status.Phase).To(gomega.Equal(v1.PodRunning))
   582  		gomega.Expect(acceptPod.Status.PodIP).ToNot(gomega.BeEmpty())
   583  
   584  		ginkgo.By("Update service LoadBalancerSourceRange and check reachability")
   585  		_, err = jig.UpdateService(ctx, func(svc *v1.Service) {
   586  			// only allow access from dropPod
   587  			svc.Spec.LoadBalancerSourceRanges = []string{dropPod.Status.PodIP + "/32"}
   588  		})
   589  		framework.ExpectNoError(err)
   590  
   591  		// We should wait until service changes are actually propagates, as this may take
   592  		// significant amount of time, especially in large clusters.
   593  		// However, the information whether it was already programmed isn't achievable.
   594  		// So we're resolving it by using loadBalancerCreateTimeout that takes cluster size into account.
   595  		checkReachabilityFromPod(false, loadBalancerCreateTimeout, namespace, acceptPod.Name, svcIP)
   596  		checkReachabilityFromPod(true, loadBalancerCreateTimeout, namespace, dropPod.Name, svcIP)
   597  
   598  		ginkgo.By("Delete LoadBalancerSourceRange field and check reachability")
   599  		_, err = jig.UpdateService(ctx, func(svc *v1.Service) {
   600  			svc.Spec.LoadBalancerSourceRanges = nil
   601  		})
   602  		framework.ExpectNoError(err)
   603  		// We should wait until service changes are actually propagates, as this may take
   604  		// significant amount of time, especially in large clusters.
   605  		// However, the information whether it was already programmed isn't achievable.
   606  		// So we're resolving it by using loadBalancerCreateTimeout that takes cluster size into account.
   607  		checkReachabilityFromPod(true, loadBalancerCreateTimeout, namespace, acceptPod.Name, svcIP)
   608  		checkReachabilityFromPod(true, loadBalancerCreateTimeout, namespace, dropPod.Name, svcIP)
   609  	})
   610  
   611  	f.It("should be able to create an internal type load balancer", f.WithSlow(), func(ctx context.Context) {
   612  		e2eskipper.SkipUnlessProviderIs("azure", "gke", "gce")
   613  
   614  		createTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)
   615  		pollInterval := framework.Poll * 10
   616  
   617  		namespace := f.Namespace.Name
   618  		serviceName := "lb-internal"
   619  		jig := e2eservice.NewTestJig(cs, namespace, serviceName)
   620  
   621  		ginkgo.By("creating pod to be part of service " + serviceName)
   622  		_, err := jig.Run(ctx, nil)
   623  		framework.ExpectNoError(err)
   624  
   625  		enableILB, disableILB := enableAndDisableInternalLB()
   626  
   627  		isInternalEndpoint := func(lbIngress *v1.LoadBalancerIngress) bool {
   628  			ingressEndpoint := e2eservice.GetIngressPoint(lbIngress)
   629  			ingressIP := netutils.ParseIPSloppy(ingressEndpoint)
   630  			if ingressIP == nil {
   631  				framework.Failf("invalid ingressEndpoint IP address format: %s", ingressEndpoint)
   632  			}
   633  			// Needs update for providers using hostname as endpoint.
   634  			return subnetPrefix.Contains(ingressIP)
   635  		}
   636  
   637  		ginkgo.By("creating a service with type LoadBalancer and cloud specific Internal-LB annotation enabled")
   638  		svc, err := jig.CreateTCPService(ctx, func(svc *v1.Service) {
   639  			svc.Spec.Type = v1.ServiceTypeLoadBalancer
   640  			enableILB(svc)
   641  		})
   642  		framework.ExpectNoError(err)
   643  
   644  		ginkgo.DeferCleanup(func(ctx context.Context) {
   645  			ginkgo.By("Clean up loadbalancer service")
   646  			e2eservice.WaitForServiceDeletedWithFinalizer(ctx, cs, svc.Namespace, svc.Name)
   647  		})
   648  
   649  		svc, err = jig.WaitForLoadBalancer(ctx, createTimeout)
   650  		framework.ExpectNoError(err)
   651  		lbIngress := &svc.Status.LoadBalancer.Ingress[0]
   652  		svcPort := int(svc.Spec.Ports[0].Port)
   653  		// should have an internal IP.
   654  		if !isInternalEndpoint(lbIngress) {
   655  			framework.Failf("lbIngress %v doesn't have an internal IP", lbIngress)
   656  		}
   657  
   658  		// ILBs are not accessible from the test orchestrator, so it's necessary to use
   659  		//  a pod to test the service.
   660  		ginkgo.By("hitting the internal load balancer from pod")
   661  		framework.Logf("creating pod with host network")
   662  		hostExec := launchHostExecPod(ctx, f.ClientSet, f.Namespace.Name, "ilb-host-exec")
   663  
   664  		framework.Logf("Waiting up to %v for service %q's internal LB to respond to requests", createTimeout, serviceName)
   665  		tcpIngressIP := e2eservice.GetIngressPoint(lbIngress)
   666  		if pollErr := wait.PollImmediate(pollInterval, createTimeout, func() (bool, error) {
   667  			cmd := fmt.Sprintf(`curl -m 5 'http://%v:%v/echo?msg=hello'`, tcpIngressIP, svcPort)
   668  			stdout, err := e2eoutput.RunHostCmd(hostExec.Namespace, hostExec.Name, cmd)
   669  			if err != nil {
   670  				framework.Logf("error curling; stdout: %v. err: %v", stdout, err)
   671  				return false, nil
   672  			}
   673  
   674  			if !strings.Contains(stdout, "hello") {
   675  				framework.Logf("Expected output to contain 'hello', got %q; retrying...", stdout)
   676  				return false, nil
   677  			}
   678  
   679  			framework.Logf("Successful curl; stdout: %v", stdout)
   680  			return true, nil
   681  		}); pollErr != nil {
   682  			framework.Failf("ginkgo.Failed to hit ILB IP, err: %v", pollErr)
   683  		}
   684  
   685  		ginkgo.By("switching to external type LoadBalancer")
   686  		svc, err = jig.UpdateService(ctx, func(svc *v1.Service) {
   687  			disableILB(svc)
   688  		})
   689  		framework.ExpectNoError(err)
   690  		framework.Logf("Waiting up to %v for service %q to have an external LoadBalancer", createTimeout, serviceName)
   691  		if pollErr := wait.PollImmediate(pollInterval, createTimeout, func() (bool, error) {
   692  			svc, err := cs.CoreV1().Services(namespace).Get(ctx, serviceName, metav1.GetOptions{})
   693  			if err != nil {
   694  				return false, err
   695  			}
   696  			lbIngress = &svc.Status.LoadBalancer.Ingress[0]
   697  			return !isInternalEndpoint(lbIngress), nil
   698  		}); pollErr != nil {
   699  			framework.Failf("Loadbalancer IP not changed to external.")
   700  		}
   701  		// should have an external IP.
   702  		gomega.Expect(isInternalEndpoint(lbIngress)).To(gomega.BeFalse())
   703  
   704  		ginkgo.By("hitting the external load balancer")
   705  		framework.Logf("Waiting up to %v for service %q's external LB to respond to requests", createTimeout, serviceName)
   706  		tcpIngressIP = e2eservice.GetIngressPoint(lbIngress)
   707  		e2eservice.TestReachableHTTP(ctx, tcpIngressIP, svcPort, e2eservice.LoadBalancerLagTimeoutDefault)
   708  
   709  		// GCE cannot test a specific IP because the test may not own it. This cloud specific condition
   710  		// will be removed when GCP supports similar functionality.
   711  		if framework.ProviderIs("azure") {
   712  			ginkgo.By("switching back to interal type LoadBalancer, with static IP specified.")
   713  			// For a cluster created with CAPZ, node-subnet may not be "10.240.0.0/16", e.g. "10.1.0.0/16".
   714  			base := netutils.BigForIP(subnetPrefix.IP)
   715  			offset := big.NewInt(0).SetBytes(netutils.ParseIPSloppy("0.0.11.11").To4()).Int64()
   716  
   717  			internalStaticIP := netutils.AddIPOffset(base, int(offset)).String()
   718  
   719  			svc, err = jig.UpdateService(ctx, func(svc *v1.Service) {
   720  				svc.Spec.LoadBalancerIP = internalStaticIP
   721  				enableILB(svc)
   722  			})
   723  			framework.ExpectNoError(err)
   724  			framework.Logf("Waiting up to %v for service %q to have an internal LoadBalancer", createTimeout, serviceName)
   725  			if pollErr := wait.PollImmediate(pollInterval, createTimeout, func() (bool, error) {
   726  				svc, err := cs.CoreV1().Services(namespace).Get(ctx, serviceName, metav1.GetOptions{})
   727  				if err != nil {
   728  					return false, err
   729  				}
   730  				lbIngress = &svc.Status.LoadBalancer.Ingress[0]
   731  				return isInternalEndpoint(lbIngress), nil
   732  			}); pollErr != nil {
   733  				framework.Failf("Loadbalancer IP not changed to internal.")
   734  			}
   735  			// should have the given static internal IP.
   736  			gomega.Expect(e2eservice.GetIngressPoint(lbIngress)).To(gomega.Equal(internalStaticIP))
   737  		}
   738  	})
   739  
   740  	// [LinuxOnly]: Windows does not support session affinity.
   741  	f.It("should have session affinity work for LoadBalancer service with ESIPP on", f.WithSlow(), "[LinuxOnly]", func(ctx context.Context) {
   742  		// L4 load balancer affinity `ClientIP` is not supported on AWS ELB.
   743  		e2eskipper.SkipIfProviderIs("aws")
   744  
   745  		svc := getServeHostnameService("affinity-lb-esipp")
   746  		svc.Spec.Type = v1.ServiceTypeLoadBalancer
   747  		svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyLocal
   748  		execAffinityTestForLBService(ctx, f, cs, svc)
   749  	})
   750  
   751  	// [LinuxOnly]: Windows does not support session affinity.
   752  	f.It("should be able to switch session affinity for LoadBalancer service with ESIPP on", f.WithSlow(), "[LinuxOnly]", func(ctx context.Context) {
   753  		// L4 load balancer affinity `ClientIP` is not supported on AWS ELB.
   754  		e2eskipper.SkipIfProviderIs("aws")
   755  
   756  		svc := getServeHostnameService("affinity-lb-esipp-transition")
   757  		svc.Spec.Type = v1.ServiceTypeLoadBalancer
   758  		svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyLocal
   759  		execAffinityTestForLBServiceWithTransition(ctx, f, cs, svc)
   760  	})
   761  
   762  	// [LinuxOnly]: Windows does not support session affinity.
   763  	f.It("should have session affinity work for LoadBalancer service with ESIPP off", f.WithSlow(), "[LinuxOnly]", func(ctx context.Context) {
   764  		// L4 load balancer affinity `ClientIP` is not supported on AWS ELB.
   765  		e2eskipper.SkipIfProviderIs("aws")
   766  
   767  		svc := getServeHostnameService("affinity-lb")
   768  		svc.Spec.Type = v1.ServiceTypeLoadBalancer
   769  		svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyCluster
   770  		execAffinityTestForLBService(ctx, f, cs, svc)
   771  	})
   772  
   773  	// [LinuxOnly]: Windows does not support session affinity.
   774  	f.It("should be able to switch session affinity for LoadBalancer service with ESIPP off", f.WithSlow(), "[LinuxOnly]", func(ctx context.Context) {
   775  		// L4 load balancer affinity `ClientIP` is not supported on AWS ELB.
   776  		e2eskipper.SkipIfProviderIs("aws")
   777  
   778  		svc := getServeHostnameService("affinity-lb-transition")
   779  		svc.Spec.Type = v1.ServiceTypeLoadBalancer
   780  		svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyCluster
   781  		execAffinityTestForLBServiceWithTransition(ctx, f, cs, svc)
   782  	})
   783  
   784  	// This test verifies if service load balancer cleanup finalizer is properly
   785  	// handled during service lifecycle.
   786  	// 1. Create service with type=LoadBalancer. Finalizer should be added.
   787  	// 2. Update service to type=ClusterIP. Finalizer should be removed.
   788  	// 3. Update service to type=LoadBalancer. Finalizer should be added.
   789  	// 4. Delete service with type=LoadBalancer. Finalizer should be removed.
   790  	f.It("should handle load balancer cleanup finalizer for service", f.WithSlow(), func(ctx context.Context) {
   791  		jig := e2eservice.NewTestJig(cs, f.Namespace.Name, "lb-finalizer")
   792  
   793  		ginkgo.By("Create load balancer service")
   794  		svc, err := jig.CreateTCPService(ctx, func(svc *v1.Service) {
   795  			svc.Spec.Type = v1.ServiceTypeLoadBalancer
   796  		})
   797  		framework.ExpectNoError(err)
   798  
   799  		ginkgo.DeferCleanup(func(ctx context.Context) {
   800  			ginkgo.By("Check that service can be deleted with finalizer")
   801  			e2eservice.WaitForServiceDeletedWithFinalizer(ctx, cs, svc.Namespace, svc.Name)
   802  		})
   803  
   804  		ginkgo.By("Wait for load balancer to serve traffic")
   805  		svc, err = jig.WaitForLoadBalancer(ctx, e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs))
   806  		framework.ExpectNoError(err)
   807  
   808  		ginkgo.By("Check if finalizer presents on service with type=LoadBalancer")
   809  		e2eservice.WaitForServiceUpdatedWithFinalizer(ctx, cs, svc.Namespace, svc.Name, true)
   810  
   811  		ginkgo.By("Check if finalizer is removed on service after changed to type=ClusterIP")
   812  		err = jig.ChangeServiceType(ctx, v1.ServiceTypeClusterIP, e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs))
   813  		framework.ExpectNoError(err)
   814  		e2eservice.WaitForServiceUpdatedWithFinalizer(ctx, cs, svc.Namespace, svc.Name, false)
   815  
   816  		ginkgo.By("Check if finalizer is added back to service after changed to type=LoadBalancer")
   817  		err = jig.ChangeServiceType(ctx, v1.ServiceTypeLoadBalancer, e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs))
   818  		framework.ExpectNoError(err)
   819  		e2eservice.WaitForServiceUpdatedWithFinalizer(ctx, cs, svc.Namespace, svc.Name, true)
   820  	})
   821  
   822  	f.It("should be able to create LoadBalancer Service without NodePort and change it", f.WithSlow(), func(ctx context.Context) {
   823  		// requires cloud load-balancer support
   824  		e2eskipper.SkipUnlessProviderIs("gce", "gke", "aws")
   825  
   826  		loadBalancerLagTimeout := e2eservice.LoadBalancerLagTimeoutDefault
   827  		if framework.ProviderIs("aws") {
   828  			loadBalancerLagTimeout = e2eservice.LoadBalancerLagTimeoutAWS
   829  		}
   830  		loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)
   831  
   832  		// This test is more monolithic than we'd like because LB turnup can be
   833  		// very slow, so we lumped all the tests into one LB lifecycle.
   834  
   835  		serviceName := "reallocate-nodeport-test"
   836  		ns1 := f.Namespace.Name // LB1 in ns1 on TCP
   837  		framework.Logf("namespace for TCP test: %s", ns1)
   838  
   839  		ginkgo.By("creating a TCP service " + serviceName + " with type=ClusterIP in namespace " + ns1)
   840  		tcpJig := e2eservice.NewTestJig(cs, ns1, serviceName)
   841  		tcpService, err := tcpJig.CreateTCPService(ctx, nil)
   842  		framework.ExpectNoError(err)
   843  
   844  		svcPort := int(tcpService.Spec.Ports[0].Port)
   845  		framework.Logf("service port TCP: %d", svcPort)
   846  
   847  		ginkgo.By("creating a pod to be part of the TCP service " + serviceName)
   848  		_, err = tcpJig.Run(ctx, nil)
   849  		framework.ExpectNoError(err)
   850  
   851  		// Change the services to LoadBalancer.
   852  
   853  		// Here we test that LoadBalancers can receive static IP addresses.  This isn't
   854  		// necessary, but is an additional feature this monolithic test checks.
   855  		requestedIP := ""
   856  		staticIPName := ""
   857  		if framework.ProviderIs("gce", "gke") {
   858  			ginkgo.By("creating a static load balancer IP")
   859  			staticIPName = fmt.Sprintf("e2e-external-lb-test-%s", framework.RunID)
   860  			gceCloud, err := gce.GetGCECloud()
   861  			framework.ExpectNoError(err, "failed to get GCE cloud provider")
   862  
   863  			err = gceCloud.ReserveRegionAddress(&compute.Address{Name: staticIPName}, gceCloud.Region())
   864  			ginkgo.DeferCleanup(func(ctx context.Context) {
   865  				if staticIPName != "" {
   866  					// Release GCE static IP - this is not kube-managed and will not be automatically released.
   867  					if err := gceCloud.DeleteRegionAddress(staticIPName, gceCloud.Region()); err != nil {
   868  						framework.Logf("failed to release static IP %s: %v", staticIPName, err)
   869  					}
   870  				}
   871  			})
   872  			framework.ExpectNoError(err, "failed to create region address: %s", staticIPName)
   873  			reservedAddr, err := gceCloud.GetRegionAddress(staticIPName, gceCloud.Region())
   874  			framework.ExpectNoError(err, "failed to get region address: %s", staticIPName)
   875  
   876  			requestedIP = reservedAddr.Address
   877  			framework.Logf("Allocated static load balancer IP: %s", requestedIP)
   878  		}
   879  
   880  		ginkgo.By("changing the TCP service to type=LoadBalancer")
   881  		_, err = tcpJig.UpdateService(ctx, func(s *v1.Service) {
   882  			s.Spec.LoadBalancerIP = requestedIP // will be "" if not applicable
   883  			s.Spec.Type = v1.ServiceTypeLoadBalancer
   884  			s.Spec.AllocateLoadBalancerNodePorts = utilpointer.BoolPtr(false)
   885  		})
   886  		framework.ExpectNoError(err)
   887  
   888  		ginkgo.By("waiting for the TCP service to have a load balancer")
   889  		// Wait for the load balancer to be created asynchronously
   890  		tcpService, err = tcpJig.WaitForLoadBalancer(ctx, loadBalancerCreateTimeout)
   891  		framework.ExpectNoError(err)
   892  		if int(tcpService.Spec.Ports[0].NodePort) != 0 {
   893  			framework.Failf("TCP Spec.Ports[0].NodePort allocated %d when not expected", tcpService.Spec.Ports[0].NodePort)
   894  		}
   895  		if requestedIP != "" && e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]) != requestedIP {
   896  			framework.Failf("unexpected TCP Status.LoadBalancer.Ingress (expected %s, got %s)", requestedIP, e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]))
   897  		}
   898  		tcpIngressIP := e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0])
   899  		framework.Logf("TCP load balancer: %s", tcpIngressIP)
   900  
   901  		if framework.ProviderIs("gce", "gke") {
   902  			// Do this as early as possible, which overrides the `defer` above.
   903  			// This is mostly out of fear of leaking the IP in a timeout case
   904  			// (as of this writing we're not 100% sure where the leaks are
   905  			// coming from, so this is first-aid rather than surgery).
   906  			ginkgo.By("demoting the static IP to ephemeral")
   907  			if staticIPName != "" {
   908  				gceCloud, err := gce.GetGCECloud()
   909  				framework.ExpectNoError(err, "failed to get GCE cloud provider")
   910  				// Deleting it after it is attached "demotes" it to an
   911  				// ephemeral IP, which can be auto-released.
   912  				if err := gceCloud.DeleteRegionAddress(staticIPName, gceCloud.Region()); err != nil {
   913  					framework.Failf("failed to release static IP %s: %v", staticIPName, err)
   914  				}
   915  				staticIPName = ""
   916  			}
   917  		}
   918  
   919  		ginkgo.By("hitting the TCP service's LoadBalancer")
   920  		e2eservice.TestReachableHTTP(ctx, tcpIngressIP, svcPort, loadBalancerLagTimeout)
   921  
   922  		// Change the services' node ports.
   923  
   924  		ginkgo.By("adding a TCP service's NodePort")
   925  		tcpService, err = tcpJig.UpdateService(ctx, func(s *v1.Service) {
   926  			s.Spec.AllocateLoadBalancerNodePorts = utilpointer.BoolPtr(true)
   927  		})
   928  		framework.ExpectNoError(err)
   929  		tcpNodePort := int(tcpService.Spec.Ports[0].NodePort)
   930  		if tcpNodePort == 0 {
   931  			framework.Failf("TCP Spec.Ports[0].NodePort (%d) not allocated", tcpNodePort)
   932  		}
   933  		if e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]) != tcpIngressIP {
   934  			framework.Failf("TCP Status.LoadBalancer.Ingress changed (%s -> %s) when not expected", tcpIngressIP, e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]))
   935  		}
   936  		framework.Logf("TCP node port: %d", tcpNodePort)
   937  
   938  		ginkgo.By("hitting the TCP service's LoadBalancer")
   939  		e2eservice.TestReachableHTTP(ctx, tcpIngressIP, svcPort, loadBalancerLagTimeout)
   940  	})
   941  
   942  	ginkgo.It("should be able to preserve UDP traffic when server pod cycles for a LoadBalancer service on different nodes", func(ctx context.Context) {
   943  		// requires cloud load-balancer support
   944  		e2eskipper.SkipUnlessProviderIs("gce", "gke", "azure")
   945  		ns := f.Namespace.Name
   946  		nodes, err := e2enode.GetBoundedReadySchedulableNodes(ctx, cs, 2)
   947  		framework.ExpectNoError(err)
   948  		if len(nodes.Items) < 2 {
   949  			e2eskipper.Skipf(
   950  				"Test requires >= 2 Ready nodes, but there are only %v nodes",
   951  				len(nodes.Items))
   952  		}
   953  
   954  		loadBalancerLagTimeout := e2eservice.LoadBalancerLagTimeoutDefault
   955  		loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)
   956  
   957  		// Create a LoadBalancer service
   958  		udpJig := e2eservice.NewTestJig(cs, ns, serviceName)
   959  		ginkgo.By("creating a UDP service " + serviceName + " with type=LoadBalancer in " + ns)
   960  		_, err = udpJig.CreateUDPService(ctx, func(svc *v1.Service) {
   961  			svc.Spec.Type = v1.ServiceTypeLoadBalancer
   962  			svc.Spec.Ports = []v1.ServicePort{
   963  				{Port: 80, Name: "udp", Protocol: v1.ProtocolUDP, TargetPort: intstr.FromInt32(80)},
   964  			}
   965  		})
   966  		framework.ExpectNoError(err)
   967  
   968  		var udpIngressIP string
   969  		ginkgo.By("waiting for the UDP service to have a load balancer")
   970  		udpService, err := udpJig.WaitForLoadBalancer(ctx, loadBalancerCreateTimeout)
   971  		framework.ExpectNoError(err)
   972  
   973  		udpIngressIP = e2eservice.GetIngressPoint(&udpService.Status.LoadBalancer.Ingress[0])
   974  		framework.Logf("UDP load balancer: %s", udpIngressIP)
   975  
   976  		// keep hitting the loadbalancer to check it fails over to the second pod
   977  		ginkgo.By("hitting the UDP service's LoadBalancer with same source port")
   978  		stopCh := make(chan struct{})
   979  		defer close(stopCh)
   980  		var mu sync.Mutex
   981  		hostnames := sets.NewString()
   982  		go func() {
   983  			defer ginkgo.GinkgoRecover()
   984  			port := int(udpService.Spec.Ports[0].Port)
   985  			laddr, err := net.ResolveUDPAddr("udp", ":54321")
   986  			if err != nil {
   987  				framework.Failf("Failed to resolve local address: %v", err)
   988  			}
   989  			raddr := net.UDPAddr{IP: netutils.ParseIPSloppy(udpIngressIP), Port: port}
   990  
   991  			for {
   992  				select {
   993  				case <-stopCh:
   994  					if len(hostnames) != 2 {
   995  						framework.Failf("Failed to hit the 2 UDP LoadBalancer backends successfully, got %v", hostnames.List())
   996  					}
   997  					return
   998  				default:
   999  					time.Sleep(1 * time.Second)
  1000  				}
  1001  
  1002  				conn, err := net.DialUDP("udp", laddr, &raddr)
  1003  				if err != nil {
  1004  					framework.Logf("Failed to connect to: %s %d", udpIngressIP, port)
  1005  					continue
  1006  				}
  1007  				conn.SetDeadline(time.Now().Add(3 * time.Second))
  1008  				framework.Logf("Connected successfully to: %s", raddr.String())
  1009  				conn.Write([]byte("hostname\n"))
  1010  				buff := make([]byte, 1024)
  1011  				n, _, err := conn.ReadFrom(buff)
  1012  				if err == nil {
  1013  					mu.Lock()
  1014  					hostnames.Insert(string(buff[:n]))
  1015  					mu.Unlock()
  1016  					framework.Logf("Connected successfully to hostname: %s", string(buff[:n]))
  1017  				}
  1018  				conn.Close()
  1019  			}
  1020  		}()
  1021  
  1022  		// Add a backend pod to the service in one node
  1023  		ginkgo.By("creating a backend pod " + podBackend1 + " for the service " + serviceName)
  1024  		serverPod1 := e2epod.NewAgnhostPod(ns, podBackend1, nil, nil, nil, "netexec", fmt.Sprintf("--udp-port=%d", 80))
  1025  		serverPod1.Labels = udpJig.Labels
  1026  		serverPod1.Spec.Hostname = "hostname1"
  1027  		nodeSelection := e2epod.NodeSelection{Name: nodes.Items[0].Name}
  1028  		e2epod.SetNodeSelection(&serverPod1.Spec, nodeSelection)
  1029  		e2epod.NewPodClient(f).CreateSync(ctx, serverPod1)
  1030  
  1031  		validateEndpointsPortsOrFail(ctx, cs, ns, serviceName, portsByPodName{podBackend1: {80}})
  1032  
  1033  		// Note that the fact that Endpoints object already exists, does NOT mean
  1034  		// that iptables (or whatever else is used) was already programmed.
  1035  		// Additionally take into account that UDP conntract entries timeout is
  1036  		// 30 seconds by default.
  1037  		// Based on the above check if the pod receives the traffic.
  1038  		ginkgo.By("checking client pod connected to the backend 1 on Node " + nodes.Items[0].Name)
  1039  		if err := wait.PollImmediate(1*time.Second, loadBalancerLagTimeout, func() (bool, error) {
  1040  			mu.Lock()
  1041  			defer mu.Unlock()
  1042  			return hostnames.Has(serverPod1.Spec.Hostname), nil
  1043  		}); err != nil {
  1044  			framework.Failf("Failed to connect to backend 1")
  1045  		}
  1046  
  1047  		// Create a second pod
  1048  		ginkgo.By("creating a second backend pod " + podBackend2 + " for the service " + serviceName)
  1049  		serverPod2 := e2epod.NewAgnhostPod(ns, podBackend2, nil, nil, nil, "netexec", fmt.Sprintf("--udp-port=%d", 80))
  1050  		serverPod2.Labels = udpJig.Labels
  1051  		serverPod2.Spec.Hostname = "hostname2"
  1052  		nodeSelection = e2epod.NodeSelection{Name: nodes.Items[1].Name}
  1053  		e2epod.SetNodeSelection(&serverPod2.Spec, nodeSelection)
  1054  		e2epod.NewPodClient(f).CreateSync(ctx, serverPod2)
  1055  
  1056  		// and delete the first pod
  1057  		framework.Logf("Cleaning up %s pod", podBackend1)
  1058  		e2epod.NewPodClient(f).DeleteSync(ctx, podBackend1, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout)
  1059  
  1060  		validateEndpointsPortsOrFail(ctx, cs, ns, serviceName, portsByPodName{podBackend2: {80}})
  1061  
  1062  		// Check that the second pod keeps receiving traffic
  1063  		// UDP conntrack entries timeout is 30 sec by default
  1064  		ginkgo.By("checking client pod connected to the backend 2 on Node " + nodes.Items[1].Name)
  1065  		if err := wait.PollImmediate(1*time.Second, loadBalancerLagTimeout, func() (bool, error) {
  1066  			mu.Lock()
  1067  			defer mu.Unlock()
  1068  			return hostnames.Has(serverPod2.Spec.Hostname), nil
  1069  		}); err != nil {
  1070  			framework.Failf("Failed to connect to backend 2")
  1071  		}
  1072  	})
  1073  
  1074  	ginkgo.It("should be able to preserve UDP traffic when server pod cycles for a LoadBalancer service on the same nodes", func(ctx context.Context) {
  1075  		// requires cloud load-balancer support
  1076  		e2eskipper.SkipUnlessProviderIs("gce", "gke", "azure")
  1077  		ns := f.Namespace.Name
  1078  		nodes, err := e2enode.GetBoundedReadySchedulableNodes(ctx, cs, 1)
  1079  		framework.ExpectNoError(err)
  1080  		if len(nodes.Items) < 1 {
  1081  			e2eskipper.Skipf(
  1082  				"Test requires >= 1 Ready nodes, but there are only %d nodes",
  1083  				len(nodes.Items))
  1084  		}
  1085  
  1086  		loadBalancerLagTimeout := e2eservice.LoadBalancerLagTimeoutDefault
  1087  		loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)
  1088  
  1089  		// Create a LoadBalancer service
  1090  		udpJig := e2eservice.NewTestJig(cs, ns, serviceName)
  1091  		ginkgo.By("creating a UDP service " + serviceName + " with type=LoadBalancer in " + ns)
  1092  		_, err = udpJig.CreateUDPService(ctx, func(svc *v1.Service) {
  1093  			svc.Spec.Type = v1.ServiceTypeLoadBalancer
  1094  			svc.Spec.Ports = []v1.ServicePort{
  1095  				{Port: 80, Name: "udp", Protocol: v1.ProtocolUDP, TargetPort: intstr.FromInt32(80)},
  1096  			}
  1097  		})
  1098  		framework.ExpectNoError(err)
  1099  
  1100  		var udpIngressIP string
  1101  		ginkgo.By("waiting for the UDP service to have a load balancer")
  1102  		udpService, err := udpJig.WaitForLoadBalancer(ctx, loadBalancerCreateTimeout)
  1103  		framework.ExpectNoError(err)
  1104  
  1105  		udpIngressIP = e2eservice.GetIngressPoint(&udpService.Status.LoadBalancer.Ingress[0])
  1106  		framework.Logf("UDP load balancer: %s", udpIngressIP)
  1107  
  1108  		// keep hitting the loadbalancer to check it fails over to the second pod
  1109  		ginkgo.By("hitting the UDP service's LoadBalancer with same source port")
  1110  		stopCh := make(chan struct{})
  1111  		defer close(stopCh)
  1112  		var mu sync.Mutex
  1113  		hostnames := sets.NewString()
  1114  		go func() {
  1115  			defer ginkgo.GinkgoRecover()
  1116  			port := int(udpService.Spec.Ports[0].Port)
  1117  			laddr, err := net.ResolveUDPAddr("udp", ":54322")
  1118  			if err != nil {
  1119  				framework.Failf("Failed to resolve local address: %v", err)
  1120  			}
  1121  			raddr := net.UDPAddr{IP: netutils.ParseIPSloppy(udpIngressIP), Port: port}
  1122  
  1123  			for {
  1124  				select {
  1125  				case <-stopCh:
  1126  					if len(hostnames) != 2 {
  1127  						framework.Failf("Failed to hit the 2 UDP LoadBalancer backends successfully, got %v", hostnames.List())
  1128  					}
  1129  					return
  1130  				default:
  1131  					time.Sleep(1 * time.Second)
  1132  				}
  1133  
  1134  				conn, err := net.DialUDP("udp", laddr, &raddr)
  1135  				if err != nil {
  1136  					framework.Logf("Failed to connect to: %s %d", udpIngressIP, port)
  1137  					continue
  1138  				}
  1139  				conn.SetDeadline(time.Now().Add(3 * time.Second))
  1140  				framework.Logf("Connected successfully to: %s", raddr.String())
  1141  				conn.Write([]byte("hostname\n"))
  1142  				buff := make([]byte, 1024)
  1143  				n, _, err := conn.ReadFrom(buff)
  1144  				if err == nil {
  1145  					mu.Lock()
  1146  					hostnames.Insert(string(buff[:n]))
  1147  					mu.Unlock()
  1148  					framework.Logf("Connected successfully to hostname: %s", string(buff[:n]))
  1149  				}
  1150  				conn.Close()
  1151  			}
  1152  		}()
  1153  
  1154  		// Add a backend pod to the service in one node
  1155  		ginkgo.By("creating a backend pod " + podBackend1 + " for the service " + serviceName)
  1156  		serverPod1 := e2epod.NewAgnhostPod(ns, podBackend1, nil, nil, nil, "netexec", fmt.Sprintf("--udp-port=%d", 80))
  1157  		serverPod1.Labels = udpJig.Labels
  1158  		serverPod1.Spec.Hostname = "hostname1"
  1159  		nodeSelection := e2epod.NodeSelection{Name: nodes.Items[0].Name}
  1160  		e2epod.SetNodeSelection(&serverPod1.Spec, nodeSelection)
  1161  		e2epod.NewPodClient(f).CreateSync(ctx, serverPod1)
  1162  
  1163  		validateEndpointsPortsOrFail(ctx, cs, ns, serviceName, portsByPodName{podBackend1: {80}})
  1164  
  1165  		// Note that the fact that Endpoints object already exists, does NOT mean
  1166  		// that iptables (or whatever else is used) was already programmed.
  1167  		// Additionally take into account that UDP conntract entries timeout is
  1168  		// 30 seconds by default.
  1169  		// Based on the above check if the pod receives the traffic.
  1170  		ginkgo.By("checking client pod connected to the backend 1 on Node " + nodes.Items[0].Name)
  1171  		if err := wait.PollImmediate(1*time.Second, loadBalancerLagTimeout, func() (bool, error) {
  1172  			mu.Lock()
  1173  			defer mu.Unlock()
  1174  			return hostnames.Has(serverPod1.Spec.Hostname), nil
  1175  		}); err != nil {
  1176  			framework.Failf("Failed to connect to backend 1")
  1177  		}
  1178  
  1179  		// Create a second pod on the same node
  1180  		ginkgo.By("creating a second backend pod " + podBackend2 + " for the service " + serviceName)
  1181  		serverPod2 := e2epod.NewAgnhostPod(ns, podBackend2, nil, nil, nil, "netexec", fmt.Sprintf("--udp-port=%d", 80))
  1182  		serverPod2.Labels = udpJig.Labels
  1183  		serverPod2.Spec.Hostname = "hostname2"
  1184  		// use the same node as previous pod
  1185  		e2epod.SetNodeSelection(&serverPod2.Spec, nodeSelection)
  1186  		e2epod.NewPodClient(f).CreateSync(ctx, serverPod2)
  1187  
  1188  		// and delete the first pod
  1189  		framework.Logf("Cleaning up %s pod", podBackend1)
  1190  		e2epod.NewPodClient(f).DeleteSync(ctx, podBackend1, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout)
  1191  
  1192  		validateEndpointsPortsOrFail(ctx, cs, ns, serviceName, portsByPodName{podBackend2: {80}})
  1193  
  1194  		// Check that the second pod keeps receiving traffic
  1195  		// UDP conntrack entries timeout is 30 sec by default
  1196  		ginkgo.By("checking client pod connected to the backend 2 on Node " + nodes.Items[0].Name)
  1197  		if err := wait.PollImmediate(1*time.Second, loadBalancerLagTimeout, func() (bool, error) {
  1198  			mu.Lock()
  1199  			defer mu.Unlock()
  1200  			return hostnames.Has(serverPod2.Spec.Hostname), nil
  1201  		}); err != nil {
  1202  			framework.Failf("Failed to connect to backend 2")
  1203  		}
  1204  	})
  1205  
  1206  	f.It("should not have connectivity disruption during rolling update with externalTrafficPolicy=Cluster", f.WithSlow(), func(ctx context.Context) {
  1207  		// We start with a low but reasonable threshold to analyze the results.
  1208  		// The goal is to achieve 99% minimum success rate.
  1209  		// TODO: We should do incremental steps toward the goal.
  1210  		minSuccessRate := 0.95
  1211  
  1212  		testRollingUpdateLBConnectivityDisruption(ctx, f, v1.ServiceExternalTrafficPolicyTypeCluster, minSuccessRate)
  1213  	})
  1214  
  1215  	f.It("should not have connectivity disruption during rolling update with externalTrafficPolicy=Local", f.WithSlow(), func(ctx context.Context) {
  1216  		// We start with a low but reasonable threshold to analyze the results.
  1217  		// The goal is to achieve 99% minimum success rate.
  1218  		// TODO: We should do incremental steps toward the goal.
  1219  		minSuccessRate := 0.95
  1220  
  1221  		testRollingUpdateLBConnectivityDisruption(ctx, f, v1.ServiceExternalTrafficPolicyTypeLocal, minSuccessRate)
  1222  	})
  1223  })
  1224  
  1225  var _ = common.SIGDescribe("LoadBalancers ESIPP", framework.WithSlow(), func() {
  1226  	f := framework.NewDefaultFramework("esipp")
  1227  	f.NamespacePodSecurityLevel = admissionapi.LevelBaseline
  1228  	var loadBalancerCreateTimeout time.Duration
  1229  
  1230  	var cs clientset.Interface
  1231  	var subnetPrefix *net.IPNet
  1232  	var err error
  1233  
  1234  	ginkgo.BeforeEach(func(ctx context.Context) {
  1235  		// requires cloud load-balancer support - this feature currently supported only on GCE/GKE
  1236  		e2eskipper.SkipUnlessProviderIs("gce", "gke")
  1237  
  1238  		cs = f.ClientSet
  1239  		loadBalancerCreateTimeout = e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)
  1240  		subnetPrefix, err = getSubnetPrefix(ctx, cs)
  1241  		framework.ExpectNoError(err)
  1242  	})
  1243  
  1244  	ginkgo.AfterEach(func(ctx context.Context) {
  1245  		if ginkgo.CurrentSpecReport().Failed() {
  1246  			DescribeSvc(f.Namespace.Name)
  1247  		}
  1248  	})
  1249  
  1250  	ginkgo.It("should work for type=LoadBalancer", func(ctx context.Context) {
  1251  		namespace := f.Namespace.Name
  1252  		serviceName := "external-local-lb"
  1253  		jig := e2eservice.NewTestJig(cs, namespace, serviceName)
  1254  
  1255  		svc, err := jig.CreateOnlyLocalLoadBalancerService(ctx, loadBalancerCreateTimeout, true, nil)
  1256  		framework.ExpectNoError(err)
  1257  		healthCheckNodePort := int(svc.Spec.HealthCheckNodePort)
  1258  		if healthCheckNodePort == 0 {
  1259  			framework.Failf("Service HealthCheck NodePort was not allocated")
  1260  		}
  1261  		ginkgo.DeferCleanup(func(ctx context.Context) {
  1262  			err = jig.ChangeServiceType(ctx, v1.ServiceTypeClusterIP, loadBalancerCreateTimeout)
  1263  			framework.ExpectNoError(err)
  1264  
  1265  			// Make sure we didn't leak the health check node port.
  1266  			const threshold = 2
  1267  			nodes, err := getEndpointNodesWithInternalIP(ctx, jig)
  1268  			framework.ExpectNoError(err)
  1269  			config := e2enetwork.NewNetworkingTestConfig(ctx, f)
  1270  			for _, internalIP := range nodes {
  1271  				err := testHTTPHealthCheckNodePortFromTestContainer(ctx,
  1272  					config,
  1273  					internalIP,
  1274  					healthCheckNodePort,
  1275  					e2eservice.KubeProxyLagTimeout,
  1276  					false,
  1277  					threshold)
  1278  				framework.ExpectNoError(err)
  1279  			}
  1280  			err = cs.CoreV1().Services(svc.Namespace).Delete(ctx, svc.Name, metav1.DeleteOptions{})
  1281  			framework.ExpectNoError(err)
  1282  		})
  1283  
  1284  		svcTCPPort := int(svc.Spec.Ports[0].Port)
  1285  		ingressIP := e2eservice.GetIngressPoint(&svc.Status.LoadBalancer.Ingress[0])
  1286  
  1287  		ginkgo.By("reading clientIP using the TCP service's service port via its external VIP")
  1288  		clientIPPort, err := GetHTTPContent(ingressIP, svcTCPPort, e2eservice.KubeProxyLagTimeout, "/clientip")
  1289  		framework.ExpectNoError(err)
  1290  		framework.Logf("ClientIP detected by target pod using VIP:SvcPort is %s", clientIPPort)
  1291  
  1292  		ginkgo.By("checking if Source IP is preserved")
  1293  		// The clientIPPort returned from GetHTTPContent is in this format: x.x.x.x:port or [xx:xx:xx::x]:port
  1294  		host, _, err := net.SplitHostPort(clientIPPort)
  1295  		if err != nil {
  1296  			framework.Failf("SplitHostPort returned unexpected error: %q", clientIPPort)
  1297  		}
  1298  		ip := netutils.ParseIPSloppy(host)
  1299  		if ip == nil {
  1300  			framework.Failf("Invalid client IP address format: %q", host)
  1301  		}
  1302  		if subnetPrefix.Contains(ip) {
  1303  			framework.Failf("Source IP was NOT preserved")
  1304  		}
  1305  	})
  1306  
  1307  	ginkgo.It("should work for type=NodePort", func(ctx context.Context) {
  1308  		namespace := f.Namespace.Name
  1309  		serviceName := "external-local-nodeport"
  1310  		jig := e2eservice.NewTestJig(cs, namespace, serviceName)
  1311  
  1312  		svc, err := jig.CreateOnlyLocalNodePortService(ctx, true)
  1313  		framework.ExpectNoError(err)
  1314  		ginkgo.DeferCleanup(func(ctx context.Context) {
  1315  			err := cs.CoreV1().Services(svc.Namespace).Delete(ctx, svc.Name, metav1.DeleteOptions{})
  1316  			framework.ExpectNoError(err)
  1317  		})
  1318  
  1319  		tcpNodePort := int(svc.Spec.Ports[0].NodePort)
  1320  
  1321  		endpointsNodeMap, err := getEndpointNodesWithInternalIP(ctx, jig)
  1322  		framework.ExpectNoError(err)
  1323  
  1324  		dialCmd := "clientip"
  1325  		config := e2enetwork.NewNetworkingTestConfig(ctx, f)
  1326  
  1327  		for nodeName, nodeIP := range endpointsNodeMap {
  1328  			ginkgo.By(fmt.Sprintf("reading clientIP using the TCP service's NodePort, on node %v: %v:%v/%v", nodeName, nodeIP, tcpNodePort, dialCmd))
  1329  			clientIP, err := GetHTTPContentFromTestContainer(ctx, config, nodeIP, tcpNodePort, e2eservice.KubeProxyLagTimeout, dialCmd)
  1330  			framework.ExpectNoError(err)
  1331  			framework.Logf("ClientIP detected by target pod using NodePort is %s, the ip of test container is %s", clientIP, config.TestContainerPod.Status.PodIP)
  1332  			// the clientIP returned by agnhost contains port
  1333  			if !strings.HasPrefix(clientIP, config.TestContainerPod.Status.PodIP) {
  1334  				framework.Failf("Source IP was NOT preserved")
  1335  			}
  1336  		}
  1337  	})
  1338  
  1339  	ginkgo.It("should only target nodes with endpoints", func(ctx context.Context) {
  1340  		namespace := f.Namespace.Name
  1341  		serviceName := "external-local-nodes"
  1342  		jig := e2eservice.NewTestJig(cs, namespace, serviceName)
  1343  		nodes, err := e2enode.GetBoundedReadySchedulableNodes(ctx, cs, e2eservice.MaxNodesForEndpointsTests)
  1344  		framework.ExpectNoError(err)
  1345  
  1346  		svc, err := jig.CreateOnlyLocalLoadBalancerService(ctx, loadBalancerCreateTimeout, false,
  1347  			func(svc *v1.Service) {
  1348  				// Change service port to avoid collision with opened hostPorts
  1349  				// in other tests that run in parallel.
  1350  				if len(svc.Spec.Ports) != 0 {
  1351  					svc.Spec.Ports[0].TargetPort = intstr.FromInt32(svc.Spec.Ports[0].Port)
  1352  					svc.Spec.Ports[0].Port = 8081
  1353  				}
  1354  
  1355  			})
  1356  		framework.ExpectNoError(err)
  1357  		ginkgo.DeferCleanup(func(ctx context.Context) {
  1358  			err = jig.ChangeServiceType(ctx, v1.ServiceTypeClusterIP, loadBalancerCreateTimeout)
  1359  			framework.ExpectNoError(err)
  1360  			err := cs.CoreV1().Services(svc.Namespace).Delete(ctx, svc.Name, metav1.DeleteOptions{})
  1361  			framework.ExpectNoError(err)
  1362  		})
  1363  
  1364  		healthCheckNodePort := int(svc.Spec.HealthCheckNodePort)
  1365  		if healthCheckNodePort == 0 {
  1366  			framework.Failf("Service HealthCheck NodePort was not allocated")
  1367  		}
  1368  
  1369  		ips := e2enode.CollectAddresses(nodes, v1.NodeInternalIP)
  1370  
  1371  		ingressIP := e2eservice.GetIngressPoint(&svc.Status.LoadBalancer.Ingress[0])
  1372  		svcTCPPort := int(svc.Spec.Ports[0].Port)
  1373  
  1374  		const threshold = 2
  1375  		config := e2enetwork.NewNetworkingTestConfig(ctx, f)
  1376  		for i := 0; i < len(nodes.Items); i++ {
  1377  			endpointNodeName := nodes.Items[i].Name
  1378  
  1379  			ginkgo.By("creating a pod to be part of the service " + serviceName + " on node " + endpointNodeName)
  1380  			_, err = jig.Run(ctx, func(rc *v1.ReplicationController) {
  1381  				rc.Name = serviceName
  1382  				if endpointNodeName != "" {
  1383  					rc.Spec.Template.Spec.NodeName = endpointNodeName
  1384  				}
  1385  			})
  1386  			framework.ExpectNoError(err)
  1387  
  1388  			ginkgo.By(fmt.Sprintf("waiting for service endpoint on node %v", endpointNodeName))
  1389  			err = jig.WaitForEndpointOnNode(ctx, endpointNodeName)
  1390  			framework.ExpectNoError(err)
  1391  
  1392  			// HealthCheck should pass only on the node where num(endpoints) > 0
  1393  			// All other nodes should fail the healthcheck on the service healthCheckNodePort
  1394  			for n, internalIP := range ips {
  1395  				// Make sure the loadbalancer picked up the health check change.
  1396  				// Confirm traffic can reach backend through LB before checking healthcheck nodeport.
  1397  				e2eservice.TestReachableHTTP(ctx, ingressIP, svcTCPPort, e2eservice.KubeProxyLagTimeout)
  1398  				expectedSuccess := nodes.Items[n].Name == endpointNodeName
  1399  				port := strconv.Itoa(healthCheckNodePort)
  1400  				ipPort := net.JoinHostPort(internalIP, port)
  1401  				framework.Logf("Health checking %s, http://%s/healthz, expectedSuccess %v", nodes.Items[n].Name, ipPort, expectedSuccess)
  1402  				err := testHTTPHealthCheckNodePortFromTestContainer(ctx,
  1403  					config,
  1404  					internalIP,
  1405  					healthCheckNodePort,
  1406  					e2eservice.KubeProxyEndpointLagTimeout,
  1407  					expectedSuccess,
  1408  					threshold)
  1409  				framework.ExpectNoError(err)
  1410  			}
  1411  			framework.ExpectNoError(e2erc.DeleteRCAndWaitForGC(ctx, f.ClientSet, namespace, serviceName))
  1412  		}
  1413  	})
  1414  
  1415  	ginkgo.It("should work from pods", func(ctx context.Context) {
  1416  		var err error
  1417  		namespace := f.Namespace.Name
  1418  		serviceName := "external-local-pods"
  1419  		jig := e2eservice.NewTestJig(cs, namespace, serviceName)
  1420  
  1421  		svc, err := jig.CreateOnlyLocalLoadBalancerService(ctx, loadBalancerCreateTimeout, true, nil)
  1422  		framework.ExpectNoError(err)
  1423  		ginkgo.DeferCleanup(func(ctx context.Context) {
  1424  			err = jig.ChangeServiceType(ctx, v1.ServiceTypeClusterIP, loadBalancerCreateTimeout)
  1425  			framework.ExpectNoError(err)
  1426  			err := cs.CoreV1().Services(svc.Namespace).Delete(ctx, svc.Name, metav1.DeleteOptions{})
  1427  			framework.ExpectNoError(err)
  1428  		})
  1429  
  1430  		ingressIP := e2eservice.GetIngressPoint(&svc.Status.LoadBalancer.Ingress[0])
  1431  		port := strconv.Itoa(int(svc.Spec.Ports[0].Port))
  1432  		ipPort := net.JoinHostPort(ingressIP, port)
  1433  		path := fmt.Sprintf("%s/clientip", ipPort)
  1434  
  1435  		ginkgo.By("Creating pause pod deployment to make sure, pausePods are in desired state")
  1436  		deployment := createPausePodDeployment(ctx, cs, "pause-pod-deployment", namespace, 1)
  1437  		framework.ExpectNoError(e2edeployment.WaitForDeploymentComplete(cs, deployment), "Failed to complete pause pod deployment")
  1438  
  1439  		ginkgo.DeferCleanup(func(ctx context.Context) {
  1440  			framework.Logf("Deleting deployment")
  1441  			err = cs.AppsV1().Deployments(namespace).Delete(ctx, deployment.Name, metav1.DeleteOptions{})
  1442  			framework.ExpectNoError(err, "Failed to delete deployment %s", deployment.Name)
  1443  		})
  1444  
  1445  		deployment, err = cs.AppsV1().Deployments(namespace).Get(ctx, deployment.Name, metav1.GetOptions{})
  1446  		framework.ExpectNoError(err, "Error in retrieving pause pod deployment")
  1447  		labelSelector, err := metav1.LabelSelectorAsSelector(deployment.Spec.Selector)
  1448  		framework.ExpectNoError(err, "Error in setting LabelSelector as selector from deployment")
  1449  
  1450  		pausePods, err := cs.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector.String()})
  1451  		framework.ExpectNoError(err, "Error in listing pods associated with pause pod deployments")
  1452  
  1453  		pausePod := pausePods.Items[0]
  1454  		framework.Logf("Waiting up to %v curl %v", e2eservice.KubeProxyLagTimeout, path)
  1455  		cmd := fmt.Sprintf(`curl -q -s --connect-timeout 30 %v`, path)
  1456  
  1457  		var srcIP string
  1458  		loadBalancerPropagationTimeout := e2eservice.GetServiceLoadBalancerPropagationTimeout(ctx, cs)
  1459  		ginkgo.By(fmt.Sprintf("Hitting external lb %v from pod %v on node %v", ingressIP, pausePod.Name, pausePod.Spec.NodeName))
  1460  		if pollErr := wait.PollImmediate(framework.Poll, loadBalancerPropagationTimeout, func() (bool, error) {
  1461  			stdout, err := e2eoutput.RunHostCmd(pausePod.Namespace, pausePod.Name, cmd)
  1462  			if err != nil {
  1463  				framework.Logf("got err: %v, retry until timeout", err)
  1464  				return false, nil
  1465  			}
  1466  			srcIP = strings.TrimSpace(strings.Split(stdout, ":")[0])
  1467  			return srcIP == pausePod.Status.PodIP, nil
  1468  		}); pollErr != nil {
  1469  			framework.Failf("Source IP not preserved from %v, expected '%v' got '%v'", pausePod.Name, pausePod.Status.PodIP, srcIP)
  1470  		}
  1471  	})
  1472  
  1473  	ginkgo.It("should handle updates to ExternalTrafficPolicy field", func(ctx context.Context) {
  1474  		namespace := f.Namespace.Name
  1475  		serviceName := "external-local-update"
  1476  		jig := e2eservice.NewTestJig(cs, namespace, serviceName)
  1477  
  1478  		nodes, err := e2enode.GetBoundedReadySchedulableNodes(ctx, cs, e2eservice.MaxNodesForEndpointsTests)
  1479  		framework.ExpectNoError(err)
  1480  		if len(nodes.Items) < 2 {
  1481  			framework.Failf("Need at least 2 nodes to verify source ip from a node without endpoint")
  1482  		}
  1483  
  1484  		svc, err := jig.CreateOnlyLocalLoadBalancerService(ctx, loadBalancerCreateTimeout, true, nil)
  1485  		framework.ExpectNoError(err)
  1486  		ginkgo.DeferCleanup(func(ctx context.Context) {
  1487  			err = jig.ChangeServiceType(ctx, v1.ServiceTypeClusterIP, loadBalancerCreateTimeout)
  1488  			framework.ExpectNoError(err)
  1489  			err := cs.CoreV1().Services(svc.Namespace).Delete(ctx, svc.Name, metav1.DeleteOptions{})
  1490  			framework.ExpectNoError(err)
  1491  		})
  1492  
  1493  		// save the health check node port because it disappears when ESIPP is turned off.
  1494  		healthCheckNodePort := int(svc.Spec.HealthCheckNodePort)
  1495  
  1496  		ginkgo.By("turning ESIPP off")
  1497  		svc, err = jig.UpdateService(ctx, func(svc *v1.Service) {
  1498  			svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyCluster
  1499  		})
  1500  		framework.ExpectNoError(err)
  1501  		if svc.Spec.HealthCheckNodePort > 0 {
  1502  			framework.Failf("Service HealthCheck NodePort still present")
  1503  		}
  1504  
  1505  		epNodes, err := jig.ListNodesWithEndpoint(ctx)
  1506  		framework.ExpectNoError(err)
  1507  		// map from name of nodes with endpoint to internal ip
  1508  		// it is assumed that there is only a single node with the endpoint
  1509  		endpointNodeMap := make(map[string]string)
  1510  		// map from name of nodes without endpoint to internal ip
  1511  		noEndpointNodeMap := make(map[string]string)
  1512  		for _, node := range epNodes {
  1513  			ips := e2enode.GetAddresses(&node, v1.NodeInternalIP)
  1514  			if len(ips) < 1 {
  1515  				framework.Failf("No internal ip found for node %s", node.Name)
  1516  			}
  1517  			endpointNodeMap[node.Name] = ips[0]
  1518  		}
  1519  		for _, n := range nodes.Items {
  1520  			ips := e2enode.GetAddresses(&n, v1.NodeInternalIP)
  1521  			if len(ips) < 1 {
  1522  				framework.Failf("No internal ip found for node %s", n.Name)
  1523  			}
  1524  			if _, ok := endpointNodeMap[n.Name]; !ok {
  1525  				noEndpointNodeMap[n.Name] = ips[0]
  1526  			}
  1527  		}
  1528  		gomega.Expect(endpointNodeMap).ToNot(gomega.BeEmpty())
  1529  		gomega.Expect(noEndpointNodeMap).ToNot(gomega.BeEmpty())
  1530  
  1531  		svcTCPPort := int(svc.Spec.Ports[0].Port)
  1532  		svcNodePort := int(svc.Spec.Ports[0].NodePort)
  1533  		ingressIP := e2eservice.GetIngressPoint(&svc.Status.LoadBalancer.Ingress[0])
  1534  		path := "/clientip"
  1535  		dialCmd := "clientip"
  1536  
  1537  		config := e2enetwork.NewNetworkingTestConfig(ctx, f)
  1538  
  1539  		ginkgo.By(fmt.Sprintf("endpoints present on nodes %v, absent on nodes %v", endpointNodeMap, noEndpointNodeMap))
  1540  		for nodeName, nodeIP := range noEndpointNodeMap {
  1541  			ginkgo.By(fmt.Sprintf("Checking %v (%v:%v/%v) proxies to endpoints on another node", nodeName, nodeIP[0], svcNodePort, dialCmd))
  1542  			_, err := GetHTTPContentFromTestContainer(ctx, config, nodeIP, svcNodePort, e2eservice.KubeProxyLagTimeout, dialCmd)
  1543  			framework.ExpectNoError(err, "Could not reach HTTP service through %v:%v/%v after %v", nodeIP, svcNodePort, dialCmd, e2eservice.KubeProxyLagTimeout)
  1544  		}
  1545  
  1546  		for nodeName, nodeIP := range endpointNodeMap {
  1547  			ginkgo.By(fmt.Sprintf("checking kube-proxy health check fails on node with endpoint (%s), public IP %s", nodeName, nodeIP))
  1548  			var body string
  1549  			pollFn := func() (bool, error) {
  1550  				// we expect connection failure here, but not other errors
  1551  				resp, err := config.GetResponseFromTestContainer(ctx,
  1552  					"http",
  1553  					"healthz",
  1554  					nodeIP,
  1555  					healthCheckNodePort)
  1556  				if err != nil {
  1557  					return false, nil
  1558  				}
  1559  				if len(resp.Errors) > 0 {
  1560  					return true, nil
  1561  				}
  1562  				if len(resp.Responses) > 0 {
  1563  					body = resp.Responses[0]
  1564  				}
  1565  				return false, nil
  1566  			}
  1567  			if pollErr := wait.PollImmediate(framework.Poll, e2eservice.TestTimeout, pollFn); pollErr != nil {
  1568  				framework.Failf("Kube-proxy still exposing health check on node %v:%v, after ESIPP was turned off. body %s",
  1569  					nodeName, healthCheckNodePort, body)
  1570  			}
  1571  		}
  1572  
  1573  		// Poll till kube-proxy re-adds the MASQUERADE rule on the node.
  1574  		ginkgo.By(fmt.Sprintf("checking source ip is NOT preserved through loadbalancer %v", ingressIP))
  1575  		var clientIP string
  1576  		pollErr := wait.PollImmediate(framework.Poll, 3*e2eservice.KubeProxyLagTimeout, func() (bool, error) {
  1577  			clientIPPort, err := GetHTTPContent(ingressIP, svcTCPPort, e2eservice.KubeProxyLagTimeout, path)
  1578  			if err != nil {
  1579  				return false, nil
  1580  			}
  1581  			// The clientIPPort returned from GetHTTPContent is in this format: x.x.x.x:port or [xx:xx:xx::x]:port
  1582  			host, _, err := net.SplitHostPort(clientIPPort)
  1583  			if err != nil {
  1584  				framework.Logf("SplitHostPort returned unexpected error: %q", clientIPPort)
  1585  				return false, nil
  1586  			}
  1587  			ip := netutils.ParseIPSloppy(host)
  1588  			if ip == nil {
  1589  				framework.Logf("Invalid client IP address format: %q", host)
  1590  				return false, nil
  1591  			}
  1592  			if subnetPrefix.Contains(ip) {
  1593  				return true, nil
  1594  			}
  1595  			return false, nil
  1596  		})
  1597  		if pollErr != nil {
  1598  			framework.Failf("Source IP WAS preserved even after ESIPP turned off. Got %v, expected a ten-dot cluster ip.", clientIP)
  1599  		}
  1600  
  1601  		// TODO: We need to attempt to create another service with the previously
  1602  		// allocated healthcheck nodePort. If the health check nodePort has been
  1603  		// freed, the new service creation will succeed, upon which we cleanup.
  1604  		// If the health check nodePort has NOT been freed, the new service
  1605  		// creation will fail.
  1606  
  1607  		ginkgo.By("setting ExternalTraffic field back to OnlyLocal")
  1608  		svc, err = jig.UpdateService(ctx, func(svc *v1.Service) {
  1609  			svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyLocal
  1610  			// Request the same healthCheckNodePort as before, to test the user-requested allocation path
  1611  			svc.Spec.HealthCheckNodePort = int32(healthCheckNodePort)
  1612  		})
  1613  		framework.ExpectNoError(err)
  1614  		loadBalancerPropagationTimeout := e2eservice.GetServiceLoadBalancerPropagationTimeout(ctx, cs)
  1615  		pollErr = wait.PollImmediate(framework.PollShortTimeout, loadBalancerPropagationTimeout, func() (bool, error) {
  1616  			clientIPPort, err := GetHTTPContent(ingressIP, svcTCPPort, e2eservice.KubeProxyLagTimeout, path)
  1617  			if err != nil {
  1618  				return false, nil
  1619  			}
  1620  			ginkgo.By(fmt.Sprintf("Endpoint %v:%v%v returned client ip %v", ingressIP, svcTCPPort, path, clientIPPort))
  1621  			// The clientIPPort returned from GetHTTPContent is in this format: x.x.x.x:port or [xx:xx:xx::x]:port
  1622  			host, _, err := net.SplitHostPort(clientIPPort)
  1623  			if err != nil {
  1624  				framework.Logf("SplitHostPort returned unexpected error: %q", clientIPPort)
  1625  				return false, nil
  1626  			}
  1627  			ip := netutils.ParseIPSloppy(host)
  1628  			if ip == nil {
  1629  				framework.Logf("Invalid client IP address format: %q", host)
  1630  				return false, nil
  1631  			}
  1632  			if !subnetPrefix.Contains(ip) {
  1633  				return true, nil
  1634  			}
  1635  			return false, nil
  1636  		})
  1637  		if pollErr != nil {
  1638  			framework.Failf("Source IP (%v) is not the client IP even after ESIPP turned on, expected a public IP.", clientIP)
  1639  		}
  1640  	})
  1641  })
  1642  
  1643  func testRollingUpdateLBConnectivityDisruption(ctx context.Context, f *framework.Framework, externalTrafficPolicy v1.ServiceExternalTrafficPolicyType, minSuccessRate float64) {
  1644  	cs := f.ClientSet
  1645  	ns := f.Namespace.Name
  1646  	name := "test-lb-rolling-update"
  1647  	labels := map[string]string{"name": name}
  1648  	gracePeriod := int64(60)
  1649  	maxUnavailable := intstr.FromString("10%")
  1650  	ds := e2edaemonset.NewDaemonSet(name, e2eapps.AgnhostImage, labels, nil, nil,
  1651  		[]v1.ContainerPort{
  1652  			{ContainerPort: 80},
  1653  		},
  1654  		"netexec", "--http-port=80", fmt.Sprintf("--delay-shutdown=%d", gracePeriod),
  1655  	)
  1656  	ds.Spec.UpdateStrategy = appsv1.DaemonSetUpdateStrategy{
  1657  		Type: appsv1.RollingUpdateDaemonSetStrategyType,
  1658  		RollingUpdate: &appsv1.RollingUpdateDaemonSet{
  1659  			MaxUnavailable: &maxUnavailable,
  1660  		},
  1661  	}
  1662  	ds.Spec.Template.Labels = labels
  1663  	ds.Spec.Template.Spec.TerminationGracePeriodSeconds = utilpointer.Int64(gracePeriod)
  1664  
  1665  	nodeNames := e2edaemonset.SchedulableNodes(ctx, cs, ds)
  1666  	e2eskipper.SkipUnlessAtLeast(len(nodeNames), 2, "load-balancer rolling update test requires at least 2 schedulable nodes for the DaemonSet")
  1667  	if len(nodeNames) > 25 {
  1668  		e2eskipper.Skipf("load-balancer rolling update test skipped for large environments with more than 25 nodes")
  1669  	}
  1670  
  1671  	ginkgo.By(fmt.Sprintf("Creating DaemonSet %q", name))
  1672  	ds, err := cs.AppsV1().DaemonSets(ns).Create(context.TODO(), ds, metav1.CreateOptions{})
  1673  	framework.ExpectNoError(err)
  1674  
  1675  	ginkgo.By("Checking that daemon pods launch on every schedulable node of the cluster")
  1676  	creationTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)
  1677  	err = wait.PollUntilContextTimeout(ctx, framework.Poll, creationTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, nodeNames))
  1678  	framework.ExpectNoError(err, "error waiting for daemon pods to start")
  1679  	err = e2edaemonset.CheckDaemonStatus(ctx, f, name)
  1680  	framework.ExpectNoError(err)
  1681  
  1682  	ginkgo.By(fmt.Sprintf("Creating a service %s with type=LoadBalancer externalTrafficPolicy=%s in namespace %s", name, externalTrafficPolicy, ns))
  1683  	jig := e2eservice.NewTestJig(cs, ns, name)
  1684  	jig.Labels = labels
  1685  	service, err := jig.CreateLoadBalancerService(ctx, creationTimeout, func(svc *v1.Service) {
  1686  		svc.Spec.ExternalTrafficPolicy = externalTrafficPolicy
  1687  	})
  1688  	framework.ExpectNoError(err)
  1689  
  1690  	lbNameOrAddress := e2eservice.GetIngressPoint(&service.Status.LoadBalancer.Ingress[0])
  1691  	svcPort := int(service.Spec.Ports[0].Port)
  1692  
  1693  	ginkgo.By("Hitting the DaemonSet's pods through the service's load balancer")
  1694  	timeout := e2eservice.LoadBalancerLagTimeoutDefault
  1695  	if framework.ProviderIs("aws") {
  1696  		timeout = e2eservice.LoadBalancerLagTimeoutAWS
  1697  	}
  1698  	e2eservice.TestReachableHTTP(ctx, lbNameOrAddress, svcPort, timeout)
  1699  
  1700  	ginkgo.By("Starting a goroutine to continuously hit the DaemonSet's pods through the service's load balancer")
  1701  	var totalRequests uint64 = 0
  1702  	var networkErrors uint64 = 0
  1703  	var httpErrors uint64 = 0
  1704  	done := make(chan struct{})
  1705  	defer close(done)
  1706  	go func() {
  1707  		defer ginkgo.GinkgoRecover()
  1708  
  1709  		wait.Until(func() {
  1710  			atomic.AddUint64(&totalRequests, 1)
  1711  			client := &http.Client{
  1712  				Transport: utilnet.SetTransportDefaults(&http.Transport{
  1713  					DisableKeepAlives: true,
  1714  				}),
  1715  				Timeout: 5 * time.Second,
  1716  			}
  1717  			ipPort := net.JoinHostPort(lbNameOrAddress, strconv.Itoa(svcPort))
  1718  			msg := "hello"
  1719  			url := fmt.Sprintf("http://%s/echo?msg=%s", ipPort, msg)
  1720  			resp, err := client.Get(url)
  1721  			if err != nil {
  1722  				framework.Logf("Got error testing for reachability of %s: %v", url, err)
  1723  				atomic.AddUint64(&networkErrors, 1)
  1724  				return
  1725  			}
  1726  			defer resp.Body.Close()
  1727  			if resp.StatusCode != http.StatusOK {
  1728  				framework.Logf("Got bad status code: %d", resp.StatusCode)
  1729  				atomic.AddUint64(&httpErrors, 1)
  1730  				return
  1731  			}
  1732  			body, err := io.ReadAll(resp.Body)
  1733  			if err != nil {
  1734  				framework.Logf("Got error reading HTTP body: %v", err)
  1735  				atomic.AddUint64(&httpErrors, 1)
  1736  				return
  1737  			}
  1738  			if string(body) != msg {
  1739  				framework.Logf("The response body does not contain expected string %s", string(body))
  1740  				atomic.AddUint64(&httpErrors, 1)
  1741  				return
  1742  			}
  1743  		}, time.Duration(0), done)
  1744  	}()
  1745  
  1746  	ginkgo.By("Triggering DaemonSet rolling update several times")
  1747  	var previousTotalRequests uint64 = 0
  1748  	var previousNetworkErrors uint64 = 0
  1749  	var previousHttpErrors uint64 = 0
  1750  	for i := 1; i <= 5; i++ {
  1751  		framework.Logf("Update daemon pods environment: [{\"name\":\"VERSION\",\"value\":\"%d\"}]", i)
  1752  		patch := fmt.Sprintf(`{"spec":{"template":{"spec":{"containers":[{"name":"%s","env":[{"name":"VERSION","value":"%d"}]}]}}}}`, ds.Spec.Template.Spec.Containers[0].Name, i)
  1753  		ds, err = cs.AppsV1().DaemonSets(ns).Patch(context.TODO(), name, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{})
  1754  		framework.ExpectNoError(err)
  1755  
  1756  		framework.Logf("Check that daemon pods are available on every node of the cluster with the updated environment.")
  1757  		err = wait.PollImmediate(framework.Poll, creationTimeout, func() (bool, error) {
  1758  			podList, err := cs.CoreV1().Pods(ds.Namespace).List(context.TODO(), metav1.ListOptions{})
  1759  			if err != nil {
  1760  				return false, err
  1761  			}
  1762  			pods := podList.Items
  1763  
  1764  			readyPods := 0
  1765  			for _, pod := range pods {
  1766  				if !metav1.IsControlledBy(&pod, ds) {
  1767  					continue
  1768  				}
  1769  				if pod.DeletionTimestamp != nil {
  1770  					continue
  1771  				}
  1772  				podVersion := ""
  1773  				for _, env := range pod.Spec.Containers[0].Env {
  1774  					if env.Name == "VERSION" {
  1775  						podVersion = env.Value
  1776  						break
  1777  					}
  1778  				}
  1779  				if podVersion != fmt.Sprintf("%d", i) {
  1780  					continue
  1781  				}
  1782  				podReady := podutil.IsPodAvailable(&pod, ds.Spec.MinReadySeconds, metav1.Now())
  1783  				if !podReady {
  1784  					continue
  1785  				}
  1786  				readyPods += 1
  1787  			}
  1788  			framework.Logf("Number of running nodes: %d, number of updated ready pods: %d in daemonset %s", len(nodeNames), readyPods, ds.Name)
  1789  			return readyPods == len(nodeNames), nil
  1790  		})
  1791  		framework.ExpectNoError(err, "error waiting for daemon pods to be ready")
  1792  
  1793  		// assert that the HTTP requests success rate is above the acceptable threshold after this rolling update
  1794  		currentTotalRequests := atomic.LoadUint64(&totalRequests)
  1795  		currentNetworkErrors := atomic.LoadUint64(&networkErrors)
  1796  		currentHttpErrors := atomic.LoadUint64(&httpErrors)
  1797  
  1798  		partialTotalRequests := currentTotalRequests - previousTotalRequests
  1799  		partialNetworkErrors := currentNetworkErrors - previousNetworkErrors
  1800  		partialHttpErrors := currentHttpErrors - previousHttpErrors
  1801  		partialSuccessRate := (float64(partialTotalRequests) - float64(partialNetworkErrors+partialHttpErrors)) / float64(partialTotalRequests)
  1802  
  1803  		framework.Logf("Load Balancer total HTTP requests: %d", partialTotalRequests)
  1804  		framework.Logf("Network errors: %d", partialNetworkErrors)
  1805  		framework.Logf("HTTP errors: %d", partialHttpErrors)
  1806  		framework.Logf("Success rate: %.2f%%", partialSuccessRate*100)
  1807  		if partialSuccessRate < minSuccessRate {
  1808  			framework.Failf("Encountered too many errors when doing HTTP requests to the load balancer address. Success rate is %.2f%%, and the minimum allowed threshold is %.2f%%.", partialSuccessRate*100, minSuccessRate*100)
  1809  		}
  1810  
  1811  		previousTotalRequests = currentTotalRequests
  1812  		previousNetworkErrors = currentNetworkErrors
  1813  		previousHttpErrors = currentHttpErrors
  1814  	}
  1815  
  1816  	// assert that the load balancer address is still reachable after the rolling updates are finished
  1817  	e2eservice.TestReachableHTTP(ctx, lbNameOrAddress, svcPort, timeout)
  1818  }