k8s.io/kubernetes@v1.29.3/test/e2e/network/loadbalancer.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package network 18 19 import ( 20 "context" 21 "fmt" 22 "io" 23 "math/big" 24 "net" 25 "net/http" 26 "strconv" 27 "strings" 28 "sync" 29 "sync/atomic" 30 "time" 31 32 compute "google.golang.org/api/compute/v1" 33 34 appsv1 "k8s.io/api/apps/v1" 35 v1 "k8s.io/api/core/v1" 36 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 37 "k8s.io/apimachinery/pkg/types" 38 "k8s.io/apimachinery/pkg/util/intstr" 39 utilnet "k8s.io/apimachinery/pkg/util/net" 40 "k8s.io/apimachinery/pkg/util/sets" 41 "k8s.io/apimachinery/pkg/util/wait" 42 clientset "k8s.io/client-go/kubernetes" 43 podutil "k8s.io/kubernetes/pkg/api/v1/pod" 44 e2eapps "k8s.io/kubernetes/test/e2e/apps" 45 "k8s.io/kubernetes/test/e2e/framework" 46 e2edaemonset "k8s.io/kubernetes/test/e2e/framework/daemonset" 47 e2edeployment "k8s.io/kubernetes/test/e2e/framework/deployment" 48 e2enetwork "k8s.io/kubernetes/test/e2e/framework/network" 49 e2enode "k8s.io/kubernetes/test/e2e/framework/node" 50 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 51 e2eoutput "k8s.io/kubernetes/test/e2e/framework/pod/output" 52 "k8s.io/kubernetes/test/e2e/framework/providers/gce" 53 e2erc "k8s.io/kubernetes/test/e2e/framework/rc" 54 e2eservice "k8s.io/kubernetes/test/e2e/framework/service" 55 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 56 "k8s.io/kubernetes/test/e2e/network/common" 57 admissionapi "k8s.io/pod-security-admission/api" 58 netutils "k8s.io/utils/net" 59 utilpointer "k8s.io/utils/pointer" 60 61 "github.com/onsi/ginkgo/v2" 62 "github.com/onsi/gomega" 63 ) 64 65 // getInternalIP returns node internal IP 66 func getInternalIP(node *v1.Node) (string, error) { 67 for _, address := range node.Status.Addresses { 68 if address.Type == v1.NodeInternalIP && address.Address != "" { 69 return address.Address, nil 70 } 71 } 72 return "", fmt.Errorf("couldn't get the internal IP of host %s with addresses %v", node.Name, node.Status.Addresses) 73 } 74 75 // getSubnetPrefix returns a network prefix based on one of the workers 76 // InternalIP adding a /16 or /64 mask depending on the IP family of the node. 77 // IMPORTANT: These assumes a flat network assigned to the nodes, that is common 78 // on cloud providers. 79 func getSubnetPrefix(ctx context.Context, c clientset.Interface) (*net.IPNet, error) { 80 node, err := getReadySchedulableWorkerNode(ctx, c) 81 if err != nil { 82 return nil, fmt.Errorf("error getting a ready schedulable worker Node, err: %w", err) 83 } 84 internalIP, err := getInternalIP(node) 85 if err != nil { 86 return nil, fmt.Errorf("error getting Node internal IP, err: %w", err) 87 } 88 ip := netutils.ParseIPSloppy(internalIP) 89 if ip == nil { 90 return nil, fmt.Errorf("invalid IP address format: %s", internalIP) 91 } 92 93 // if IPv6 return a net.IPNet with IP = ip and mask /64 94 ciderMask := net.CIDRMask(64, 128) 95 // if IPv4 return a net.IPNet with IP = ip and mask /16 96 if netutils.IsIPv4(ip) { 97 ciderMask = net.CIDRMask(16, 32) 98 } 99 return &net.IPNet{IP: ip.Mask(ciderMask), Mask: ciderMask}, nil 100 } 101 102 // getReadySchedulableWorkerNode gets a single worker node which is available for 103 // running pods on. If there are no such available nodes it will return an error. 104 func getReadySchedulableWorkerNode(ctx context.Context, c clientset.Interface) (*v1.Node, error) { 105 nodes, err := e2enode.GetReadySchedulableNodes(ctx, c) 106 if err != nil { 107 return nil, err 108 } 109 for i := range nodes.Items { 110 node := nodes.Items[i] 111 _, isMaster := node.Labels["node-role.kubernetes.io/master"] 112 _, isControlPlane := node.Labels["node-role.kubernetes.io/control-plane"] 113 if !isMaster && !isControlPlane { 114 return &node, nil 115 } 116 } 117 return nil, fmt.Errorf("there are currently no ready, schedulable worker nodes in the cluster") 118 } 119 120 var _ = common.SIGDescribe("LoadBalancers", func() { 121 f := framework.NewDefaultFramework("loadbalancers") 122 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 123 124 var cs clientset.Interface 125 var subnetPrefix *net.IPNet 126 var err error 127 128 ginkgo.BeforeEach(func(ctx context.Context) { 129 cs = f.ClientSet 130 subnetPrefix, err = getSubnetPrefix(ctx, cs) 131 framework.ExpectNoError(err) 132 }) 133 134 ginkgo.AfterEach(func(ctx context.Context) { 135 if ginkgo.CurrentSpecReport().Failed() { 136 DescribeSvc(f.Namespace.Name) 137 } 138 }) 139 140 f.It("should be able to change the type and ports of a TCP service", f.WithSlow(), func(ctx context.Context) { 141 // requires cloud load-balancer support 142 e2eskipper.SkipUnlessProviderIs("gce", "gke", "aws") 143 144 loadBalancerLagTimeout := e2eservice.LoadBalancerLagTimeoutDefault 145 if framework.ProviderIs("aws") { 146 loadBalancerLagTimeout = e2eservice.LoadBalancerLagTimeoutAWS 147 } 148 loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs) 149 150 // This test is more monolithic than we'd like because LB turnup can be 151 // very slow, so we lumped all the tests into one LB lifecycle. 152 153 serviceName := "mutability-test" 154 ns1 := f.Namespace.Name // LB1 in ns1 on TCP 155 framework.Logf("namespace for TCP test: %s", ns1) 156 157 ginkgo.By("creating a TCP service " + serviceName + " with type=ClusterIP in namespace " + ns1) 158 tcpJig := e2eservice.NewTestJig(cs, ns1, serviceName) 159 tcpService, err := tcpJig.CreateTCPService(ctx, nil) 160 framework.ExpectNoError(err) 161 162 svcPort := int(tcpService.Spec.Ports[0].Port) 163 framework.Logf("service port TCP: %d", svcPort) 164 165 ginkgo.By("creating a pod to be part of the TCP service " + serviceName) 166 _, err = tcpJig.Run(ctx, nil) 167 framework.ExpectNoError(err) 168 169 execPod := e2epod.CreateExecPodOrFail(ctx, cs, ns1, "execpod", nil) 170 err = tcpJig.CheckServiceReachability(ctx, tcpService, execPod) 171 framework.ExpectNoError(err) 172 173 // Change the services to NodePort. 174 175 ginkgo.By("changing the TCP service to type=NodePort") 176 tcpService, err = tcpJig.UpdateService(ctx, func(s *v1.Service) { 177 s.Spec.Type = v1.ServiceTypeNodePort 178 }) 179 framework.ExpectNoError(err) 180 tcpNodePort := int(tcpService.Spec.Ports[0].NodePort) 181 framework.Logf("TCP node port: %d", tcpNodePort) 182 183 err = tcpJig.CheckServiceReachability(ctx, tcpService, execPod) 184 framework.ExpectNoError(err) 185 186 // Change the services to LoadBalancer. 187 188 // Here we test that LoadBalancers can receive static IP addresses. This isn't 189 // necessary, but is an additional feature this monolithic test checks. 190 requestedIP := "" 191 staticIPName := "" 192 if framework.ProviderIs("gce", "gke") { 193 ginkgo.By("creating a static load balancer IP") 194 staticIPName = fmt.Sprintf("e2e-external-lb-test-%s", framework.RunID) 195 gceCloud, err := gce.GetGCECloud() 196 framework.ExpectNoError(err, "failed to get GCE cloud provider") 197 198 err = gceCloud.ReserveRegionAddress(&compute.Address{Name: staticIPName}, gceCloud.Region()) 199 defer func() { 200 if staticIPName != "" { 201 // Release GCE static IP - this is not kube-managed and will not be automatically released. 202 if err := gceCloud.DeleteRegionAddress(staticIPName, gceCloud.Region()); err != nil { 203 framework.Logf("failed to release static IP %s: %v", staticIPName, err) 204 } 205 } 206 }() 207 framework.ExpectNoError(err, "failed to create region address: %s", staticIPName) 208 reservedAddr, err := gceCloud.GetRegionAddress(staticIPName, gceCloud.Region()) 209 framework.ExpectNoError(err, "failed to get region address: %s", staticIPName) 210 211 requestedIP = reservedAddr.Address 212 framework.Logf("Allocated static load balancer IP: %s", requestedIP) 213 } 214 215 ginkgo.By("changing the TCP service to type=LoadBalancer") 216 _, err = tcpJig.UpdateService(ctx, func(s *v1.Service) { 217 s.Spec.LoadBalancerIP = requestedIP // will be "" if not applicable 218 s.Spec.Type = v1.ServiceTypeLoadBalancer 219 }) 220 framework.ExpectNoError(err) 221 222 ginkgo.By("waiting for the TCP service to have a load balancer") 223 // Wait for the load balancer to be created asynchronously 224 tcpService, err = tcpJig.WaitForLoadBalancer(ctx, loadBalancerCreateTimeout) 225 framework.ExpectNoError(err) 226 if int(tcpService.Spec.Ports[0].NodePort) != tcpNodePort { 227 framework.Failf("TCP Spec.Ports[0].NodePort changed (%d -> %d) when not expected", tcpNodePort, tcpService.Spec.Ports[0].NodePort) 228 } 229 if requestedIP != "" && e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]) != requestedIP { 230 framework.Failf("unexpected TCP Status.LoadBalancer.Ingress (expected %s, got %s)", requestedIP, e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0])) 231 } 232 tcpIngressIP := e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]) 233 framework.Logf("TCP load balancer: %s", tcpIngressIP) 234 235 if framework.ProviderIs("gce", "gke") { 236 // Do this as early as possible, which overrides the `defer` above. 237 // This is mostly out of fear of leaking the IP in a timeout case 238 // (as of this writing we're not 100% sure where the leaks are 239 // coming from, so this is first-aid rather than surgery). 240 ginkgo.By("demoting the static IP to ephemeral") 241 if staticIPName != "" { 242 gceCloud, err := gce.GetGCECloud() 243 framework.ExpectNoError(err, "failed to get GCE cloud provider") 244 // Deleting it after it is attached "demotes" it to an 245 // ephemeral IP, which can be auto-released. 246 if err := gceCloud.DeleteRegionAddress(staticIPName, gceCloud.Region()); err != nil { 247 framework.Failf("failed to release static IP %s: %v", staticIPName, err) 248 } 249 staticIPName = "" 250 } 251 } 252 253 err = tcpJig.CheckServiceReachability(ctx, tcpService, execPod) 254 framework.ExpectNoError(err) 255 256 ginkgo.By("hitting the TCP service's LoadBalancer") 257 e2eservice.TestReachableHTTP(ctx, tcpIngressIP, svcPort, loadBalancerLagTimeout) 258 259 // Change the services' node ports. 260 261 ginkgo.By("changing the TCP service's NodePort") 262 tcpService, err = tcpJig.ChangeServiceNodePort(ctx, tcpNodePort) 263 framework.ExpectNoError(err) 264 tcpNodePortOld := tcpNodePort 265 tcpNodePort = int(tcpService.Spec.Ports[0].NodePort) 266 if tcpNodePort == tcpNodePortOld { 267 framework.Failf("TCP Spec.Ports[0].NodePort (%d) did not change", tcpNodePort) 268 } 269 if e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]) != tcpIngressIP { 270 framework.Failf("TCP Status.LoadBalancer.Ingress changed (%s -> %s) when not expected", tcpIngressIP, e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0])) 271 } 272 framework.Logf("TCP node port: %d", tcpNodePort) 273 274 ginkgo.By("hitting the TCP service's LoadBalancer") 275 e2eservice.TestReachableHTTP(ctx, tcpIngressIP, svcPort, loadBalancerLagTimeout) 276 277 // Change the services' main ports. 278 279 ginkgo.By("changing the TCP service's port") 280 tcpService, err = tcpJig.UpdateService(ctx, func(s *v1.Service) { 281 s.Spec.Ports[0].Port++ 282 }) 283 framework.ExpectNoError(err) 284 svcPortOld := svcPort 285 svcPort = int(tcpService.Spec.Ports[0].Port) 286 if svcPort == svcPortOld { 287 framework.Failf("TCP Spec.Ports[0].Port (%d) did not change", svcPort) 288 } 289 if int(tcpService.Spec.Ports[0].NodePort) != tcpNodePort { 290 framework.Failf("TCP Spec.Ports[0].NodePort (%d) changed", tcpService.Spec.Ports[0].NodePort) 291 } 292 if e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]) != tcpIngressIP { 293 framework.Failf("TCP Status.LoadBalancer.Ingress changed (%s -> %s) when not expected", tcpIngressIP, e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0])) 294 } 295 296 framework.Logf("service port TCP: %d", svcPort) 297 298 ginkgo.By("hitting the TCP service's LoadBalancer") 299 e2eservice.TestReachableHTTP(ctx, tcpIngressIP, svcPort, loadBalancerCreateTimeout) 300 301 ginkgo.By("Scaling the pods to 0") 302 err = tcpJig.Scale(ctx, 0) 303 framework.ExpectNoError(err) 304 305 ginkgo.By("looking for ICMP REJECT on the TCP service's LoadBalancer") 306 testRejectedHTTP(tcpIngressIP, svcPort, loadBalancerCreateTimeout) 307 308 ginkgo.By("Scaling the pods to 1") 309 err = tcpJig.Scale(ctx, 1) 310 framework.ExpectNoError(err) 311 312 ginkgo.By("hitting the TCP service's LoadBalancer") 313 e2eservice.TestReachableHTTP(ctx, tcpIngressIP, svcPort, loadBalancerCreateTimeout) 314 315 // Change the services back to ClusterIP. 316 317 ginkgo.By("changing TCP service back to type=ClusterIP") 318 tcpReadback, err := tcpJig.UpdateService(ctx, func(s *v1.Service) { 319 s.Spec.Type = v1.ServiceTypeClusterIP 320 }) 321 framework.ExpectNoError(err) 322 if tcpReadback.Spec.Ports[0].NodePort != 0 { 323 framework.Fail("TCP Spec.Ports[0].NodePort was not cleared") 324 } 325 // Wait for the load balancer to be destroyed asynchronously 326 _, err = tcpJig.WaitForLoadBalancerDestroy(ctx, tcpIngressIP, svcPort, loadBalancerCreateTimeout) 327 framework.ExpectNoError(err) 328 329 ginkgo.By("checking the TCP LoadBalancer is closed") 330 testNotReachableHTTP(tcpIngressIP, svcPort, loadBalancerLagTimeout) 331 }) 332 333 f.It("should be able to change the type and ports of a UDP service", f.WithSlow(), func(ctx context.Context) { 334 // requires cloud load-balancer support 335 e2eskipper.SkipUnlessProviderIs("gce", "gke") 336 337 loadBalancerLagTimeout := e2eservice.LoadBalancerLagTimeoutDefault 338 loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs) 339 340 // This test is more monolithic than we'd like because LB turnup can be 341 // very slow, so we lumped all the tests into one LB lifecycle. 342 343 serviceName := "mutability-test" 344 ns2 := f.Namespace.Name // LB1 in ns2 on TCP 345 framework.Logf("namespace for TCP test: %s", ns2) 346 347 ginkgo.By("creating a UDP service " + serviceName + " with type=ClusterIP in namespace " + ns2) 348 udpJig := e2eservice.NewTestJig(cs, ns2, serviceName) 349 udpService, err := udpJig.CreateUDPService(ctx, nil) 350 framework.ExpectNoError(err) 351 352 svcPort := int(udpService.Spec.Ports[0].Port) 353 framework.Logf("service port UDP: %d", svcPort) 354 355 ginkgo.By("creating a pod to be part of the UDP service " + serviceName) 356 _, err = udpJig.Run(ctx, nil) 357 framework.ExpectNoError(err) 358 359 execPod := e2epod.CreateExecPodOrFail(ctx, cs, ns2, "execpod", nil) 360 err = udpJig.CheckServiceReachability(ctx, udpService, execPod) 361 framework.ExpectNoError(err) 362 363 // Change the services to NodePort. 364 365 ginkgo.By("changing the UDP service to type=NodePort") 366 udpService, err = udpJig.UpdateService(ctx, func(s *v1.Service) { 367 s.Spec.Type = v1.ServiceTypeNodePort 368 }) 369 framework.ExpectNoError(err) 370 udpNodePort := int(udpService.Spec.Ports[0].NodePort) 371 framework.Logf("UDP node port: %d", udpNodePort) 372 373 err = udpJig.CheckServiceReachability(ctx, udpService, execPod) 374 framework.ExpectNoError(err) 375 376 // Change the services to LoadBalancer. 377 378 // Here we test that LoadBalancers can receive static IP addresses. This isn't 379 // necessary, but is an additional feature this monolithic test checks. 380 requestedIP := "" 381 staticIPName := "" 382 ginkgo.By("creating a static load balancer IP") 383 staticIPName = fmt.Sprintf("e2e-external-lb-test-%s", framework.RunID) 384 gceCloud, err := gce.GetGCECloud() 385 framework.ExpectNoError(err, "failed to get GCE cloud provider") 386 387 err = gceCloud.ReserveRegionAddress(&compute.Address{Name: staticIPName}, gceCloud.Region()) 388 defer func() { 389 if staticIPName != "" { 390 // Release GCE static IP - this is not kube-managed and will not be automatically released. 391 if err := gceCloud.DeleteRegionAddress(staticIPName, gceCloud.Region()); err != nil { 392 framework.Logf("failed to release static IP %s: %v", staticIPName, err) 393 } 394 } 395 }() 396 framework.ExpectNoError(err, "failed to create region address: %s", staticIPName) 397 reservedAddr, err := gceCloud.GetRegionAddress(staticIPName, gceCloud.Region()) 398 framework.ExpectNoError(err, "failed to get region address: %s", staticIPName) 399 400 requestedIP = reservedAddr.Address 401 framework.Logf("Allocated static load balancer IP: %s", requestedIP) 402 403 ginkgo.By("changing the UDP service to type=LoadBalancer") 404 _, err = udpJig.UpdateService(ctx, func(s *v1.Service) { 405 s.Spec.Type = v1.ServiceTypeLoadBalancer 406 }) 407 framework.ExpectNoError(err) 408 409 // Do this as early as possible, which overrides the `defer` above. 410 // This is mostly out of fear of leaking the IP in a timeout case 411 // (as of this writing we're not 100% sure where the leaks are 412 // coming from, so this is first-aid rather than surgery). 413 ginkgo.By("demoting the static IP to ephemeral") 414 if staticIPName != "" { 415 gceCloud, err := gce.GetGCECloud() 416 framework.ExpectNoError(err, "failed to get GCE cloud provider") 417 // Deleting it after it is attached "demotes" it to an 418 // ephemeral IP, which can be auto-released. 419 if err := gceCloud.DeleteRegionAddress(staticIPName, gceCloud.Region()); err != nil { 420 framework.Failf("failed to release static IP %s: %v", staticIPName, err) 421 } 422 staticIPName = "" 423 } 424 425 var udpIngressIP string 426 ginkgo.By("waiting for the UDP service to have a load balancer") 427 // 2nd one should be faster since they ran in parallel. 428 udpService, err = udpJig.WaitForLoadBalancer(ctx, loadBalancerCreateTimeout) 429 framework.ExpectNoError(err) 430 if int(udpService.Spec.Ports[0].NodePort) != udpNodePort { 431 framework.Failf("UDP Spec.Ports[0].NodePort changed (%d -> %d) when not expected", udpNodePort, udpService.Spec.Ports[0].NodePort) 432 } 433 udpIngressIP = e2eservice.GetIngressPoint(&udpService.Status.LoadBalancer.Ingress[0]) 434 framework.Logf("UDP load balancer: %s", udpIngressIP) 435 436 err = udpJig.CheckServiceReachability(ctx, udpService, execPod) 437 framework.ExpectNoError(err) 438 439 ginkgo.By("hitting the UDP service's LoadBalancer") 440 testReachableUDP(udpIngressIP, svcPort, loadBalancerLagTimeout) 441 442 // Change the services' node ports. 443 444 ginkgo.By("changing the UDP service's NodePort") 445 udpService, err = udpJig.ChangeServiceNodePort(ctx, udpNodePort) 446 framework.ExpectNoError(err) 447 udpNodePortOld := udpNodePort 448 udpNodePort = int(udpService.Spec.Ports[0].NodePort) 449 if udpNodePort == udpNodePortOld { 450 framework.Failf("UDP Spec.Ports[0].NodePort (%d) did not change", udpNodePort) 451 } 452 if e2eservice.GetIngressPoint(&udpService.Status.LoadBalancer.Ingress[0]) != udpIngressIP { 453 framework.Failf("UDP Status.LoadBalancer.Ingress changed (%s -> %s) when not expected", udpIngressIP, e2eservice.GetIngressPoint(&udpService.Status.LoadBalancer.Ingress[0])) 454 } 455 framework.Logf("UDP node port: %d", udpNodePort) 456 457 err = udpJig.CheckServiceReachability(ctx, udpService, execPod) 458 framework.ExpectNoError(err) 459 460 ginkgo.By("hitting the UDP service's LoadBalancer") 461 testReachableUDP(udpIngressIP, svcPort, loadBalancerLagTimeout) 462 463 // Change the services' main ports. 464 465 ginkgo.By("changing the UDP service's port") 466 udpService, err = udpJig.UpdateService(ctx, func(s *v1.Service) { 467 s.Spec.Ports[0].Port++ 468 }) 469 framework.ExpectNoError(err) 470 svcPortOld := svcPort 471 svcPort = int(udpService.Spec.Ports[0].Port) 472 if svcPort == svcPortOld { 473 framework.Failf("UDP Spec.Ports[0].Port (%d) did not change", svcPort) 474 } 475 if int(udpService.Spec.Ports[0].NodePort) != udpNodePort { 476 framework.Failf("UDP Spec.Ports[0].NodePort (%d) changed", udpService.Spec.Ports[0].NodePort) 477 } 478 if e2eservice.GetIngressPoint(&udpService.Status.LoadBalancer.Ingress[0]) != udpIngressIP { 479 framework.Failf("UDP Status.LoadBalancer.Ingress changed (%s -> %s) when not expected", udpIngressIP, e2eservice.GetIngressPoint(&udpService.Status.LoadBalancer.Ingress[0])) 480 } 481 482 framework.Logf("service port UDP: %d", svcPort) 483 484 ginkgo.By("hitting the UDP service's NodePort") 485 err = udpJig.CheckServiceReachability(ctx, udpService, execPod) 486 framework.ExpectNoError(err) 487 488 ginkgo.By("hitting the UDP service's LoadBalancer") 489 testReachableUDP(udpIngressIP, svcPort, loadBalancerCreateTimeout) 490 491 ginkgo.By("Scaling the pods to 0") 492 err = udpJig.Scale(ctx, 0) 493 framework.ExpectNoError(err) 494 495 ginkgo.By("looking for ICMP REJECT on the UDP service's LoadBalancer") 496 testRejectedUDP(udpIngressIP, svcPort, loadBalancerCreateTimeout) 497 498 ginkgo.By("Scaling the pods to 1") 499 err = udpJig.Scale(ctx, 1) 500 framework.ExpectNoError(err) 501 502 ginkgo.By("hitting the UDP service's NodePort") 503 err = udpJig.CheckServiceReachability(ctx, udpService, execPod) 504 framework.ExpectNoError(err) 505 506 ginkgo.By("hitting the UDP service's LoadBalancer") 507 testReachableUDP(udpIngressIP, svcPort, loadBalancerCreateTimeout) 508 509 // Change the services back to ClusterIP. 510 511 ginkgo.By("changing UDP service back to type=ClusterIP") 512 udpReadback, err := udpJig.UpdateService(ctx, func(s *v1.Service) { 513 s.Spec.Type = v1.ServiceTypeClusterIP 514 }) 515 framework.ExpectNoError(err) 516 if udpReadback.Spec.Ports[0].NodePort != 0 { 517 framework.Fail("UDP Spec.Ports[0].NodePort was not cleared") 518 } 519 // Wait for the load balancer to be destroyed asynchronously 520 _, err = udpJig.WaitForLoadBalancerDestroy(ctx, udpIngressIP, svcPort, loadBalancerCreateTimeout) 521 framework.ExpectNoError(err) 522 523 ginkgo.By("checking the UDP LoadBalancer is closed") 524 testNotReachableUDP(udpIngressIP, svcPort, loadBalancerLagTimeout) 525 }) 526 527 f.It("should only allow access from service loadbalancer source ranges", f.WithSlow(), func(ctx context.Context) { 528 // this feature currently supported only on GCE/GKE/AWS/AZURE 529 e2eskipper.SkipUnlessProviderIs("gce", "gke", "aws", "azure") 530 531 loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs) 532 533 namespace := f.Namespace.Name 534 serviceName := "lb-sourcerange" 535 jig := e2eservice.NewTestJig(cs, namespace, serviceName) 536 537 ginkgo.By("Prepare allow source ips") 538 // prepare the exec pods 539 // acceptPod are allowed to access the loadbalancer 540 acceptPod := e2epod.CreateExecPodOrFail(ctx, cs, namespace, "execpod-accept", nil) 541 dropPod := e2epod.CreateExecPodOrFail(ctx, cs, namespace, "execpod-drop", nil) 542 543 ginkgo.By("creating a pod to be part of the service " + serviceName) 544 // This container is an nginx container listening on port 80 545 // See kubernetes/contrib/ingress/echoheaders/nginx.conf for content of response 546 _, err := jig.Run(ctx, nil) 547 framework.ExpectNoError(err) 548 // Make sure acceptPod is running. There are certain chances that pod might be terminated due to unexpected reasons. 549 acceptPod, err = cs.CoreV1().Pods(namespace).Get(ctx, acceptPod.Name, metav1.GetOptions{}) 550 framework.ExpectNoError(err, "Unable to get pod %s", acceptPod.Name) 551 gomega.Expect(acceptPod.Status.Phase).To(gomega.Equal(v1.PodRunning)) 552 gomega.Expect(acceptPod.Status.PodIP).ToNot(gomega.BeEmpty()) 553 554 // Create loadbalancer service with source range from node[0] and podAccept 555 svc, err := jig.CreateTCPService(ctx, func(svc *v1.Service) { 556 svc.Spec.Type = v1.ServiceTypeLoadBalancer 557 svc.Spec.LoadBalancerSourceRanges = []string{acceptPod.Status.PodIP + "/32"} 558 }) 559 framework.ExpectNoError(err) 560 561 ginkgo.DeferCleanup(func(ctx context.Context) { 562 ginkgo.By("Clean up loadbalancer service") 563 e2eservice.WaitForServiceDeletedWithFinalizer(ctx, cs, svc.Namespace, svc.Name) 564 }) 565 566 svc, err = jig.WaitForLoadBalancer(ctx, loadBalancerCreateTimeout) 567 framework.ExpectNoError(err) 568 569 ginkgo.By("check reachability from different sources") 570 svcIP := e2eservice.GetIngressPoint(&svc.Status.LoadBalancer.Ingress[0]) 571 // We should wait until service changes are actually propagated in the cloud-provider, 572 // as this may take significant amount of time, especially in large clusters. 573 // However, the information whether it was already programmed isn't achievable. 574 // So we're resolving it by using loadBalancerCreateTimeout that takes cluster size into account. 575 checkReachabilityFromPod(true, loadBalancerCreateTimeout, namespace, acceptPod.Name, svcIP) 576 checkReachabilityFromPod(false, loadBalancerCreateTimeout, namespace, dropPod.Name, svcIP) 577 578 // Make sure dropPod is running. There are certain chances that the pod might be terminated due to unexpected reasons. 579 dropPod, err = cs.CoreV1().Pods(namespace).Get(ctx, dropPod.Name, metav1.GetOptions{}) 580 framework.ExpectNoError(err, "Unable to get pod %s", dropPod.Name) 581 gomega.Expect(acceptPod.Status.Phase).To(gomega.Equal(v1.PodRunning)) 582 gomega.Expect(acceptPod.Status.PodIP).ToNot(gomega.BeEmpty()) 583 584 ginkgo.By("Update service LoadBalancerSourceRange and check reachability") 585 _, err = jig.UpdateService(ctx, func(svc *v1.Service) { 586 // only allow access from dropPod 587 svc.Spec.LoadBalancerSourceRanges = []string{dropPod.Status.PodIP + "/32"} 588 }) 589 framework.ExpectNoError(err) 590 591 // We should wait until service changes are actually propagates, as this may take 592 // significant amount of time, especially in large clusters. 593 // However, the information whether it was already programmed isn't achievable. 594 // So we're resolving it by using loadBalancerCreateTimeout that takes cluster size into account. 595 checkReachabilityFromPod(false, loadBalancerCreateTimeout, namespace, acceptPod.Name, svcIP) 596 checkReachabilityFromPod(true, loadBalancerCreateTimeout, namespace, dropPod.Name, svcIP) 597 598 ginkgo.By("Delete LoadBalancerSourceRange field and check reachability") 599 _, err = jig.UpdateService(ctx, func(svc *v1.Service) { 600 svc.Spec.LoadBalancerSourceRanges = nil 601 }) 602 framework.ExpectNoError(err) 603 // We should wait until service changes are actually propagates, as this may take 604 // significant amount of time, especially in large clusters. 605 // However, the information whether it was already programmed isn't achievable. 606 // So we're resolving it by using loadBalancerCreateTimeout that takes cluster size into account. 607 checkReachabilityFromPod(true, loadBalancerCreateTimeout, namespace, acceptPod.Name, svcIP) 608 checkReachabilityFromPod(true, loadBalancerCreateTimeout, namespace, dropPod.Name, svcIP) 609 }) 610 611 f.It("should be able to create an internal type load balancer", f.WithSlow(), func(ctx context.Context) { 612 e2eskipper.SkipUnlessProviderIs("azure", "gke", "gce") 613 614 createTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs) 615 pollInterval := framework.Poll * 10 616 617 namespace := f.Namespace.Name 618 serviceName := "lb-internal" 619 jig := e2eservice.NewTestJig(cs, namespace, serviceName) 620 621 ginkgo.By("creating pod to be part of service " + serviceName) 622 _, err := jig.Run(ctx, nil) 623 framework.ExpectNoError(err) 624 625 enableILB, disableILB := enableAndDisableInternalLB() 626 627 isInternalEndpoint := func(lbIngress *v1.LoadBalancerIngress) bool { 628 ingressEndpoint := e2eservice.GetIngressPoint(lbIngress) 629 ingressIP := netutils.ParseIPSloppy(ingressEndpoint) 630 if ingressIP == nil { 631 framework.Failf("invalid ingressEndpoint IP address format: %s", ingressEndpoint) 632 } 633 // Needs update for providers using hostname as endpoint. 634 return subnetPrefix.Contains(ingressIP) 635 } 636 637 ginkgo.By("creating a service with type LoadBalancer and cloud specific Internal-LB annotation enabled") 638 svc, err := jig.CreateTCPService(ctx, func(svc *v1.Service) { 639 svc.Spec.Type = v1.ServiceTypeLoadBalancer 640 enableILB(svc) 641 }) 642 framework.ExpectNoError(err) 643 644 ginkgo.DeferCleanup(func(ctx context.Context) { 645 ginkgo.By("Clean up loadbalancer service") 646 e2eservice.WaitForServiceDeletedWithFinalizer(ctx, cs, svc.Namespace, svc.Name) 647 }) 648 649 svc, err = jig.WaitForLoadBalancer(ctx, createTimeout) 650 framework.ExpectNoError(err) 651 lbIngress := &svc.Status.LoadBalancer.Ingress[0] 652 svcPort := int(svc.Spec.Ports[0].Port) 653 // should have an internal IP. 654 if !isInternalEndpoint(lbIngress) { 655 framework.Failf("lbIngress %v doesn't have an internal IP", lbIngress) 656 } 657 658 // ILBs are not accessible from the test orchestrator, so it's necessary to use 659 // a pod to test the service. 660 ginkgo.By("hitting the internal load balancer from pod") 661 framework.Logf("creating pod with host network") 662 hostExec := launchHostExecPod(ctx, f.ClientSet, f.Namespace.Name, "ilb-host-exec") 663 664 framework.Logf("Waiting up to %v for service %q's internal LB to respond to requests", createTimeout, serviceName) 665 tcpIngressIP := e2eservice.GetIngressPoint(lbIngress) 666 if pollErr := wait.PollImmediate(pollInterval, createTimeout, func() (bool, error) { 667 cmd := fmt.Sprintf(`curl -m 5 'http://%v:%v/echo?msg=hello'`, tcpIngressIP, svcPort) 668 stdout, err := e2eoutput.RunHostCmd(hostExec.Namespace, hostExec.Name, cmd) 669 if err != nil { 670 framework.Logf("error curling; stdout: %v. err: %v", stdout, err) 671 return false, nil 672 } 673 674 if !strings.Contains(stdout, "hello") { 675 framework.Logf("Expected output to contain 'hello', got %q; retrying...", stdout) 676 return false, nil 677 } 678 679 framework.Logf("Successful curl; stdout: %v", stdout) 680 return true, nil 681 }); pollErr != nil { 682 framework.Failf("ginkgo.Failed to hit ILB IP, err: %v", pollErr) 683 } 684 685 ginkgo.By("switching to external type LoadBalancer") 686 svc, err = jig.UpdateService(ctx, func(svc *v1.Service) { 687 disableILB(svc) 688 }) 689 framework.ExpectNoError(err) 690 framework.Logf("Waiting up to %v for service %q to have an external LoadBalancer", createTimeout, serviceName) 691 if pollErr := wait.PollImmediate(pollInterval, createTimeout, func() (bool, error) { 692 svc, err := cs.CoreV1().Services(namespace).Get(ctx, serviceName, metav1.GetOptions{}) 693 if err != nil { 694 return false, err 695 } 696 lbIngress = &svc.Status.LoadBalancer.Ingress[0] 697 return !isInternalEndpoint(lbIngress), nil 698 }); pollErr != nil { 699 framework.Failf("Loadbalancer IP not changed to external.") 700 } 701 // should have an external IP. 702 gomega.Expect(isInternalEndpoint(lbIngress)).To(gomega.BeFalse()) 703 704 ginkgo.By("hitting the external load balancer") 705 framework.Logf("Waiting up to %v for service %q's external LB to respond to requests", createTimeout, serviceName) 706 tcpIngressIP = e2eservice.GetIngressPoint(lbIngress) 707 e2eservice.TestReachableHTTP(ctx, tcpIngressIP, svcPort, e2eservice.LoadBalancerLagTimeoutDefault) 708 709 // GCE cannot test a specific IP because the test may not own it. This cloud specific condition 710 // will be removed when GCP supports similar functionality. 711 if framework.ProviderIs("azure") { 712 ginkgo.By("switching back to interal type LoadBalancer, with static IP specified.") 713 // For a cluster created with CAPZ, node-subnet may not be "10.240.0.0/16", e.g. "10.1.0.0/16". 714 base := netutils.BigForIP(subnetPrefix.IP) 715 offset := big.NewInt(0).SetBytes(netutils.ParseIPSloppy("0.0.11.11").To4()).Int64() 716 717 internalStaticIP := netutils.AddIPOffset(base, int(offset)).String() 718 719 svc, err = jig.UpdateService(ctx, func(svc *v1.Service) { 720 svc.Spec.LoadBalancerIP = internalStaticIP 721 enableILB(svc) 722 }) 723 framework.ExpectNoError(err) 724 framework.Logf("Waiting up to %v for service %q to have an internal LoadBalancer", createTimeout, serviceName) 725 if pollErr := wait.PollImmediate(pollInterval, createTimeout, func() (bool, error) { 726 svc, err := cs.CoreV1().Services(namespace).Get(ctx, serviceName, metav1.GetOptions{}) 727 if err != nil { 728 return false, err 729 } 730 lbIngress = &svc.Status.LoadBalancer.Ingress[0] 731 return isInternalEndpoint(lbIngress), nil 732 }); pollErr != nil { 733 framework.Failf("Loadbalancer IP not changed to internal.") 734 } 735 // should have the given static internal IP. 736 gomega.Expect(e2eservice.GetIngressPoint(lbIngress)).To(gomega.Equal(internalStaticIP)) 737 } 738 }) 739 740 // [LinuxOnly]: Windows does not support session affinity. 741 f.It("should have session affinity work for LoadBalancer service with ESIPP on", f.WithSlow(), "[LinuxOnly]", func(ctx context.Context) { 742 // L4 load balancer affinity `ClientIP` is not supported on AWS ELB. 743 e2eskipper.SkipIfProviderIs("aws") 744 745 svc := getServeHostnameService("affinity-lb-esipp") 746 svc.Spec.Type = v1.ServiceTypeLoadBalancer 747 svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyLocal 748 execAffinityTestForLBService(ctx, f, cs, svc) 749 }) 750 751 // [LinuxOnly]: Windows does not support session affinity. 752 f.It("should be able to switch session affinity for LoadBalancer service with ESIPP on", f.WithSlow(), "[LinuxOnly]", func(ctx context.Context) { 753 // L4 load balancer affinity `ClientIP` is not supported on AWS ELB. 754 e2eskipper.SkipIfProviderIs("aws") 755 756 svc := getServeHostnameService("affinity-lb-esipp-transition") 757 svc.Spec.Type = v1.ServiceTypeLoadBalancer 758 svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyLocal 759 execAffinityTestForLBServiceWithTransition(ctx, f, cs, svc) 760 }) 761 762 // [LinuxOnly]: Windows does not support session affinity. 763 f.It("should have session affinity work for LoadBalancer service with ESIPP off", f.WithSlow(), "[LinuxOnly]", func(ctx context.Context) { 764 // L4 load balancer affinity `ClientIP` is not supported on AWS ELB. 765 e2eskipper.SkipIfProviderIs("aws") 766 767 svc := getServeHostnameService("affinity-lb") 768 svc.Spec.Type = v1.ServiceTypeLoadBalancer 769 svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyCluster 770 execAffinityTestForLBService(ctx, f, cs, svc) 771 }) 772 773 // [LinuxOnly]: Windows does not support session affinity. 774 f.It("should be able to switch session affinity for LoadBalancer service with ESIPP off", f.WithSlow(), "[LinuxOnly]", func(ctx context.Context) { 775 // L4 load balancer affinity `ClientIP` is not supported on AWS ELB. 776 e2eskipper.SkipIfProviderIs("aws") 777 778 svc := getServeHostnameService("affinity-lb-transition") 779 svc.Spec.Type = v1.ServiceTypeLoadBalancer 780 svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyCluster 781 execAffinityTestForLBServiceWithTransition(ctx, f, cs, svc) 782 }) 783 784 // This test verifies if service load balancer cleanup finalizer is properly 785 // handled during service lifecycle. 786 // 1. Create service with type=LoadBalancer. Finalizer should be added. 787 // 2. Update service to type=ClusterIP. Finalizer should be removed. 788 // 3. Update service to type=LoadBalancer. Finalizer should be added. 789 // 4. Delete service with type=LoadBalancer. Finalizer should be removed. 790 f.It("should handle load balancer cleanup finalizer for service", f.WithSlow(), func(ctx context.Context) { 791 jig := e2eservice.NewTestJig(cs, f.Namespace.Name, "lb-finalizer") 792 793 ginkgo.By("Create load balancer service") 794 svc, err := jig.CreateTCPService(ctx, func(svc *v1.Service) { 795 svc.Spec.Type = v1.ServiceTypeLoadBalancer 796 }) 797 framework.ExpectNoError(err) 798 799 ginkgo.DeferCleanup(func(ctx context.Context) { 800 ginkgo.By("Check that service can be deleted with finalizer") 801 e2eservice.WaitForServiceDeletedWithFinalizer(ctx, cs, svc.Namespace, svc.Name) 802 }) 803 804 ginkgo.By("Wait for load balancer to serve traffic") 805 svc, err = jig.WaitForLoadBalancer(ctx, e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)) 806 framework.ExpectNoError(err) 807 808 ginkgo.By("Check if finalizer presents on service with type=LoadBalancer") 809 e2eservice.WaitForServiceUpdatedWithFinalizer(ctx, cs, svc.Namespace, svc.Name, true) 810 811 ginkgo.By("Check if finalizer is removed on service after changed to type=ClusterIP") 812 err = jig.ChangeServiceType(ctx, v1.ServiceTypeClusterIP, e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)) 813 framework.ExpectNoError(err) 814 e2eservice.WaitForServiceUpdatedWithFinalizer(ctx, cs, svc.Namespace, svc.Name, false) 815 816 ginkgo.By("Check if finalizer is added back to service after changed to type=LoadBalancer") 817 err = jig.ChangeServiceType(ctx, v1.ServiceTypeLoadBalancer, e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)) 818 framework.ExpectNoError(err) 819 e2eservice.WaitForServiceUpdatedWithFinalizer(ctx, cs, svc.Namespace, svc.Name, true) 820 }) 821 822 f.It("should be able to create LoadBalancer Service without NodePort and change it", f.WithSlow(), func(ctx context.Context) { 823 // requires cloud load-balancer support 824 e2eskipper.SkipUnlessProviderIs("gce", "gke", "aws") 825 826 loadBalancerLagTimeout := e2eservice.LoadBalancerLagTimeoutDefault 827 if framework.ProviderIs("aws") { 828 loadBalancerLagTimeout = e2eservice.LoadBalancerLagTimeoutAWS 829 } 830 loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs) 831 832 // This test is more monolithic than we'd like because LB turnup can be 833 // very slow, so we lumped all the tests into one LB lifecycle. 834 835 serviceName := "reallocate-nodeport-test" 836 ns1 := f.Namespace.Name // LB1 in ns1 on TCP 837 framework.Logf("namespace for TCP test: %s", ns1) 838 839 ginkgo.By("creating a TCP service " + serviceName + " with type=ClusterIP in namespace " + ns1) 840 tcpJig := e2eservice.NewTestJig(cs, ns1, serviceName) 841 tcpService, err := tcpJig.CreateTCPService(ctx, nil) 842 framework.ExpectNoError(err) 843 844 svcPort := int(tcpService.Spec.Ports[0].Port) 845 framework.Logf("service port TCP: %d", svcPort) 846 847 ginkgo.By("creating a pod to be part of the TCP service " + serviceName) 848 _, err = tcpJig.Run(ctx, nil) 849 framework.ExpectNoError(err) 850 851 // Change the services to LoadBalancer. 852 853 // Here we test that LoadBalancers can receive static IP addresses. This isn't 854 // necessary, but is an additional feature this monolithic test checks. 855 requestedIP := "" 856 staticIPName := "" 857 if framework.ProviderIs("gce", "gke") { 858 ginkgo.By("creating a static load balancer IP") 859 staticIPName = fmt.Sprintf("e2e-external-lb-test-%s", framework.RunID) 860 gceCloud, err := gce.GetGCECloud() 861 framework.ExpectNoError(err, "failed to get GCE cloud provider") 862 863 err = gceCloud.ReserveRegionAddress(&compute.Address{Name: staticIPName}, gceCloud.Region()) 864 ginkgo.DeferCleanup(func(ctx context.Context) { 865 if staticIPName != "" { 866 // Release GCE static IP - this is not kube-managed and will not be automatically released. 867 if err := gceCloud.DeleteRegionAddress(staticIPName, gceCloud.Region()); err != nil { 868 framework.Logf("failed to release static IP %s: %v", staticIPName, err) 869 } 870 } 871 }) 872 framework.ExpectNoError(err, "failed to create region address: %s", staticIPName) 873 reservedAddr, err := gceCloud.GetRegionAddress(staticIPName, gceCloud.Region()) 874 framework.ExpectNoError(err, "failed to get region address: %s", staticIPName) 875 876 requestedIP = reservedAddr.Address 877 framework.Logf("Allocated static load balancer IP: %s", requestedIP) 878 } 879 880 ginkgo.By("changing the TCP service to type=LoadBalancer") 881 _, err = tcpJig.UpdateService(ctx, func(s *v1.Service) { 882 s.Spec.LoadBalancerIP = requestedIP // will be "" if not applicable 883 s.Spec.Type = v1.ServiceTypeLoadBalancer 884 s.Spec.AllocateLoadBalancerNodePorts = utilpointer.BoolPtr(false) 885 }) 886 framework.ExpectNoError(err) 887 888 ginkgo.By("waiting for the TCP service to have a load balancer") 889 // Wait for the load balancer to be created asynchronously 890 tcpService, err = tcpJig.WaitForLoadBalancer(ctx, loadBalancerCreateTimeout) 891 framework.ExpectNoError(err) 892 if int(tcpService.Spec.Ports[0].NodePort) != 0 { 893 framework.Failf("TCP Spec.Ports[0].NodePort allocated %d when not expected", tcpService.Spec.Ports[0].NodePort) 894 } 895 if requestedIP != "" && e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]) != requestedIP { 896 framework.Failf("unexpected TCP Status.LoadBalancer.Ingress (expected %s, got %s)", requestedIP, e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0])) 897 } 898 tcpIngressIP := e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]) 899 framework.Logf("TCP load balancer: %s", tcpIngressIP) 900 901 if framework.ProviderIs("gce", "gke") { 902 // Do this as early as possible, which overrides the `defer` above. 903 // This is mostly out of fear of leaking the IP in a timeout case 904 // (as of this writing we're not 100% sure where the leaks are 905 // coming from, so this is first-aid rather than surgery). 906 ginkgo.By("demoting the static IP to ephemeral") 907 if staticIPName != "" { 908 gceCloud, err := gce.GetGCECloud() 909 framework.ExpectNoError(err, "failed to get GCE cloud provider") 910 // Deleting it after it is attached "demotes" it to an 911 // ephemeral IP, which can be auto-released. 912 if err := gceCloud.DeleteRegionAddress(staticIPName, gceCloud.Region()); err != nil { 913 framework.Failf("failed to release static IP %s: %v", staticIPName, err) 914 } 915 staticIPName = "" 916 } 917 } 918 919 ginkgo.By("hitting the TCP service's LoadBalancer") 920 e2eservice.TestReachableHTTP(ctx, tcpIngressIP, svcPort, loadBalancerLagTimeout) 921 922 // Change the services' node ports. 923 924 ginkgo.By("adding a TCP service's NodePort") 925 tcpService, err = tcpJig.UpdateService(ctx, func(s *v1.Service) { 926 s.Spec.AllocateLoadBalancerNodePorts = utilpointer.BoolPtr(true) 927 }) 928 framework.ExpectNoError(err) 929 tcpNodePort := int(tcpService.Spec.Ports[0].NodePort) 930 if tcpNodePort == 0 { 931 framework.Failf("TCP Spec.Ports[0].NodePort (%d) not allocated", tcpNodePort) 932 } 933 if e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0]) != tcpIngressIP { 934 framework.Failf("TCP Status.LoadBalancer.Ingress changed (%s -> %s) when not expected", tcpIngressIP, e2eservice.GetIngressPoint(&tcpService.Status.LoadBalancer.Ingress[0])) 935 } 936 framework.Logf("TCP node port: %d", tcpNodePort) 937 938 ginkgo.By("hitting the TCP service's LoadBalancer") 939 e2eservice.TestReachableHTTP(ctx, tcpIngressIP, svcPort, loadBalancerLagTimeout) 940 }) 941 942 ginkgo.It("should be able to preserve UDP traffic when server pod cycles for a LoadBalancer service on different nodes", func(ctx context.Context) { 943 // requires cloud load-balancer support 944 e2eskipper.SkipUnlessProviderIs("gce", "gke", "azure") 945 ns := f.Namespace.Name 946 nodes, err := e2enode.GetBoundedReadySchedulableNodes(ctx, cs, 2) 947 framework.ExpectNoError(err) 948 if len(nodes.Items) < 2 { 949 e2eskipper.Skipf( 950 "Test requires >= 2 Ready nodes, but there are only %v nodes", 951 len(nodes.Items)) 952 } 953 954 loadBalancerLagTimeout := e2eservice.LoadBalancerLagTimeoutDefault 955 loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs) 956 957 // Create a LoadBalancer service 958 udpJig := e2eservice.NewTestJig(cs, ns, serviceName) 959 ginkgo.By("creating a UDP service " + serviceName + " with type=LoadBalancer in " + ns) 960 _, err = udpJig.CreateUDPService(ctx, func(svc *v1.Service) { 961 svc.Spec.Type = v1.ServiceTypeLoadBalancer 962 svc.Spec.Ports = []v1.ServicePort{ 963 {Port: 80, Name: "udp", Protocol: v1.ProtocolUDP, TargetPort: intstr.FromInt32(80)}, 964 } 965 }) 966 framework.ExpectNoError(err) 967 968 var udpIngressIP string 969 ginkgo.By("waiting for the UDP service to have a load balancer") 970 udpService, err := udpJig.WaitForLoadBalancer(ctx, loadBalancerCreateTimeout) 971 framework.ExpectNoError(err) 972 973 udpIngressIP = e2eservice.GetIngressPoint(&udpService.Status.LoadBalancer.Ingress[0]) 974 framework.Logf("UDP load balancer: %s", udpIngressIP) 975 976 // keep hitting the loadbalancer to check it fails over to the second pod 977 ginkgo.By("hitting the UDP service's LoadBalancer with same source port") 978 stopCh := make(chan struct{}) 979 defer close(stopCh) 980 var mu sync.Mutex 981 hostnames := sets.NewString() 982 go func() { 983 defer ginkgo.GinkgoRecover() 984 port := int(udpService.Spec.Ports[0].Port) 985 laddr, err := net.ResolveUDPAddr("udp", ":54321") 986 if err != nil { 987 framework.Failf("Failed to resolve local address: %v", err) 988 } 989 raddr := net.UDPAddr{IP: netutils.ParseIPSloppy(udpIngressIP), Port: port} 990 991 for { 992 select { 993 case <-stopCh: 994 if len(hostnames) != 2 { 995 framework.Failf("Failed to hit the 2 UDP LoadBalancer backends successfully, got %v", hostnames.List()) 996 } 997 return 998 default: 999 time.Sleep(1 * time.Second) 1000 } 1001 1002 conn, err := net.DialUDP("udp", laddr, &raddr) 1003 if err != nil { 1004 framework.Logf("Failed to connect to: %s %d", udpIngressIP, port) 1005 continue 1006 } 1007 conn.SetDeadline(time.Now().Add(3 * time.Second)) 1008 framework.Logf("Connected successfully to: %s", raddr.String()) 1009 conn.Write([]byte("hostname\n")) 1010 buff := make([]byte, 1024) 1011 n, _, err := conn.ReadFrom(buff) 1012 if err == nil { 1013 mu.Lock() 1014 hostnames.Insert(string(buff[:n])) 1015 mu.Unlock() 1016 framework.Logf("Connected successfully to hostname: %s", string(buff[:n])) 1017 } 1018 conn.Close() 1019 } 1020 }() 1021 1022 // Add a backend pod to the service in one node 1023 ginkgo.By("creating a backend pod " + podBackend1 + " for the service " + serviceName) 1024 serverPod1 := e2epod.NewAgnhostPod(ns, podBackend1, nil, nil, nil, "netexec", fmt.Sprintf("--udp-port=%d", 80)) 1025 serverPod1.Labels = udpJig.Labels 1026 serverPod1.Spec.Hostname = "hostname1" 1027 nodeSelection := e2epod.NodeSelection{Name: nodes.Items[0].Name} 1028 e2epod.SetNodeSelection(&serverPod1.Spec, nodeSelection) 1029 e2epod.NewPodClient(f).CreateSync(ctx, serverPod1) 1030 1031 validateEndpointsPortsOrFail(ctx, cs, ns, serviceName, portsByPodName{podBackend1: {80}}) 1032 1033 // Note that the fact that Endpoints object already exists, does NOT mean 1034 // that iptables (or whatever else is used) was already programmed. 1035 // Additionally take into account that UDP conntract entries timeout is 1036 // 30 seconds by default. 1037 // Based on the above check if the pod receives the traffic. 1038 ginkgo.By("checking client pod connected to the backend 1 on Node " + nodes.Items[0].Name) 1039 if err := wait.PollImmediate(1*time.Second, loadBalancerLagTimeout, func() (bool, error) { 1040 mu.Lock() 1041 defer mu.Unlock() 1042 return hostnames.Has(serverPod1.Spec.Hostname), nil 1043 }); err != nil { 1044 framework.Failf("Failed to connect to backend 1") 1045 } 1046 1047 // Create a second pod 1048 ginkgo.By("creating a second backend pod " + podBackend2 + " for the service " + serviceName) 1049 serverPod2 := e2epod.NewAgnhostPod(ns, podBackend2, nil, nil, nil, "netexec", fmt.Sprintf("--udp-port=%d", 80)) 1050 serverPod2.Labels = udpJig.Labels 1051 serverPod2.Spec.Hostname = "hostname2" 1052 nodeSelection = e2epod.NodeSelection{Name: nodes.Items[1].Name} 1053 e2epod.SetNodeSelection(&serverPod2.Spec, nodeSelection) 1054 e2epod.NewPodClient(f).CreateSync(ctx, serverPod2) 1055 1056 // and delete the first pod 1057 framework.Logf("Cleaning up %s pod", podBackend1) 1058 e2epod.NewPodClient(f).DeleteSync(ctx, podBackend1, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout) 1059 1060 validateEndpointsPortsOrFail(ctx, cs, ns, serviceName, portsByPodName{podBackend2: {80}}) 1061 1062 // Check that the second pod keeps receiving traffic 1063 // UDP conntrack entries timeout is 30 sec by default 1064 ginkgo.By("checking client pod connected to the backend 2 on Node " + nodes.Items[1].Name) 1065 if err := wait.PollImmediate(1*time.Second, loadBalancerLagTimeout, func() (bool, error) { 1066 mu.Lock() 1067 defer mu.Unlock() 1068 return hostnames.Has(serverPod2.Spec.Hostname), nil 1069 }); err != nil { 1070 framework.Failf("Failed to connect to backend 2") 1071 } 1072 }) 1073 1074 ginkgo.It("should be able to preserve UDP traffic when server pod cycles for a LoadBalancer service on the same nodes", func(ctx context.Context) { 1075 // requires cloud load-balancer support 1076 e2eskipper.SkipUnlessProviderIs("gce", "gke", "azure") 1077 ns := f.Namespace.Name 1078 nodes, err := e2enode.GetBoundedReadySchedulableNodes(ctx, cs, 1) 1079 framework.ExpectNoError(err) 1080 if len(nodes.Items) < 1 { 1081 e2eskipper.Skipf( 1082 "Test requires >= 1 Ready nodes, but there are only %d nodes", 1083 len(nodes.Items)) 1084 } 1085 1086 loadBalancerLagTimeout := e2eservice.LoadBalancerLagTimeoutDefault 1087 loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs) 1088 1089 // Create a LoadBalancer service 1090 udpJig := e2eservice.NewTestJig(cs, ns, serviceName) 1091 ginkgo.By("creating a UDP service " + serviceName + " with type=LoadBalancer in " + ns) 1092 _, err = udpJig.CreateUDPService(ctx, func(svc *v1.Service) { 1093 svc.Spec.Type = v1.ServiceTypeLoadBalancer 1094 svc.Spec.Ports = []v1.ServicePort{ 1095 {Port: 80, Name: "udp", Protocol: v1.ProtocolUDP, TargetPort: intstr.FromInt32(80)}, 1096 } 1097 }) 1098 framework.ExpectNoError(err) 1099 1100 var udpIngressIP string 1101 ginkgo.By("waiting for the UDP service to have a load balancer") 1102 udpService, err := udpJig.WaitForLoadBalancer(ctx, loadBalancerCreateTimeout) 1103 framework.ExpectNoError(err) 1104 1105 udpIngressIP = e2eservice.GetIngressPoint(&udpService.Status.LoadBalancer.Ingress[0]) 1106 framework.Logf("UDP load balancer: %s", udpIngressIP) 1107 1108 // keep hitting the loadbalancer to check it fails over to the second pod 1109 ginkgo.By("hitting the UDP service's LoadBalancer with same source port") 1110 stopCh := make(chan struct{}) 1111 defer close(stopCh) 1112 var mu sync.Mutex 1113 hostnames := sets.NewString() 1114 go func() { 1115 defer ginkgo.GinkgoRecover() 1116 port := int(udpService.Spec.Ports[0].Port) 1117 laddr, err := net.ResolveUDPAddr("udp", ":54322") 1118 if err != nil { 1119 framework.Failf("Failed to resolve local address: %v", err) 1120 } 1121 raddr := net.UDPAddr{IP: netutils.ParseIPSloppy(udpIngressIP), Port: port} 1122 1123 for { 1124 select { 1125 case <-stopCh: 1126 if len(hostnames) != 2 { 1127 framework.Failf("Failed to hit the 2 UDP LoadBalancer backends successfully, got %v", hostnames.List()) 1128 } 1129 return 1130 default: 1131 time.Sleep(1 * time.Second) 1132 } 1133 1134 conn, err := net.DialUDP("udp", laddr, &raddr) 1135 if err != nil { 1136 framework.Logf("Failed to connect to: %s %d", udpIngressIP, port) 1137 continue 1138 } 1139 conn.SetDeadline(time.Now().Add(3 * time.Second)) 1140 framework.Logf("Connected successfully to: %s", raddr.String()) 1141 conn.Write([]byte("hostname\n")) 1142 buff := make([]byte, 1024) 1143 n, _, err := conn.ReadFrom(buff) 1144 if err == nil { 1145 mu.Lock() 1146 hostnames.Insert(string(buff[:n])) 1147 mu.Unlock() 1148 framework.Logf("Connected successfully to hostname: %s", string(buff[:n])) 1149 } 1150 conn.Close() 1151 } 1152 }() 1153 1154 // Add a backend pod to the service in one node 1155 ginkgo.By("creating a backend pod " + podBackend1 + " for the service " + serviceName) 1156 serverPod1 := e2epod.NewAgnhostPod(ns, podBackend1, nil, nil, nil, "netexec", fmt.Sprintf("--udp-port=%d", 80)) 1157 serverPod1.Labels = udpJig.Labels 1158 serverPod1.Spec.Hostname = "hostname1" 1159 nodeSelection := e2epod.NodeSelection{Name: nodes.Items[0].Name} 1160 e2epod.SetNodeSelection(&serverPod1.Spec, nodeSelection) 1161 e2epod.NewPodClient(f).CreateSync(ctx, serverPod1) 1162 1163 validateEndpointsPortsOrFail(ctx, cs, ns, serviceName, portsByPodName{podBackend1: {80}}) 1164 1165 // Note that the fact that Endpoints object already exists, does NOT mean 1166 // that iptables (or whatever else is used) was already programmed. 1167 // Additionally take into account that UDP conntract entries timeout is 1168 // 30 seconds by default. 1169 // Based on the above check if the pod receives the traffic. 1170 ginkgo.By("checking client pod connected to the backend 1 on Node " + nodes.Items[0].Name) 1171 if err := wait.PollImmediate(1*time.Second, loadBalancerLagTimeout, func() (bool, error) { 1172 mu.Lock() 1173 defer mu.Unlock() 1174 return hostnames.Has(serverPod1.Spec.Hostname), nil 1175 }); err != nil { 1176 framework.Failf("Failed to connect to backend 1") 1177 } 1178 1179 // Create a second pod on the same node 1180 ginkgo.By("creating a second backend pod " + podBackend2 + " for the service " + serviceName) 1181 serverPod2 := e2epod.NewAgnhostPod(ns, podBackend2, nil, nil, nil, "netexec", fmt.Sprintf("--udp-port=%d", 80)) 1182 serverPod2.Labels = udpJig.Labels 1183 serverPod2.Spec.Hostname = "hostname2" 1184 // use the same node as previous pod 1185 e2epod.SetNodeSelection(&serverPod2.Spec, nodeSelection) 1186 e2epod.NewPodClient(f).CreateSync(ctx, serverPod2) 1187 1188 // and delete the first pod 1189 framework.Logf("Cleaning up %s pod", podBackend1) 1190 e2epod.NewPodClient(f).DeleteSync(ctx, podBackend1, metav1.DeleteOptions{}, e2epod.DefaultPodDeletionTimeout) 1191 1192 validateEndpointsPortsOrFail(ctx, cs, ns, serviceName, portsByPodName{podBackend2: {80}}) 1193 1194 // Check that the second pod keeps receiving traffic 1195 // UDP conntrack entries timeout is 30 sec by default 1196 ginkgo.By("checking client pod connected to the backend 2 on Node " + nodes.Items[0].Name) 1197 if err := wait.PollImmediate(1*time.Second, loadBalancerLagTimeout, func() (bool, error) { 1198 mu.Lock() 1199 defer mu.Unlock() 1200 return hostnames.Has(serverPod2.Spec.Hostname), nil 1201 }); err != nil { 1202 framework.Failf("Failed to connect to backend 2") 1203 } 1204 }) 1205 1206 f.It("should not have connectivity disruption during rolling update with externalTrafficPolicy=Cluster", f.WithSlow(), func(ctx context.Context) { 1207 // We start with a low but reasonable threshold to analyze the results. 1208 // The goal is to achieve 99% minimum success rate. 1209 // TODO: We should do incremental steps toward the goal. 1210 minSuccessRate := 0.95 1211 1212 testRollingUpdateLBConnectivityDisruption(ctx, f, v1.ServiceExternalTrafficPolicyTypeCluster, minSuccessRate) 1213 }) 1214 1215 f.It("should not have connectivity disruption during rolling update with externalTrafficPolicy=Local", f.WithSlow(), func(ctx context.Context) { 1216 // We start with a low but reasonable threshold to analyze the results. 1217 // The goal is to achieve 99% minimum success rate. 1218 // TODO: We should do incremental steps toward the goal. 1219 minSuccessRate := 0.95 1220 1221 testRollingUpdateLBConnectivityDisruption(ctx, f, v1.ServiceExternalTrafficPolicyTypeLocal, minSuccessRate) 1222 }) 1223 }) 1224 1225 var _ = common.SIGDescribe("LoadBalancers ESIPP", framework.WithSlow(), func() { 1226 f := framework.NewDefaultFramework("esipp") 1227 f.NamespacePodSecurityLevel = admissionapi.LevelBaseline 1228 var loadBalancerCreateTimeout time.Duration 1229 1230 var cs clientset.Interface 1231 var subnetPrefix *net.IPNet 1232 var err error 1233 1234 ginkgo.BeforeEach(func(ctx context.Context) { 1235 // requires cloud load-balancer support - this feature currently supported only on GCE/GKE 1236 e2eskipper.SkipUnlessProviderIs("gce", "gke") 1237 1238 cs = f.ClientSet 1239 loadBalancerCreateTimeout = e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs) 1240 subnetPrefix, err = getSubnetPrefix(ctx, cs) 1241 framework.ExpectNoError(err) 1242 }) 1243 1244 ginkgo.AfterEach(func(ctx context.Context) { 1245 if ginkgo.CurrentSpecReport().Failed() { 1246 DescribeSvc(f.Namespace.Name) 1247 } 1248 }) 1249 1250 ginkgo.It("should work for type=LoadBalancer", func(ctx context.Context) { 1251 namespace := f.Namespace.Name 1252 serviceName := "external-local-lb" 1253 jig := e2eservice.NewTestJig(cs, namespace, serviceName) 1254 1255 svc, err := jig.CreateOnlyLocalLoadBalancerService(ctx, loadBalancerCreateTimeout, true, nil) 1256 framework.ExpectNoError(err) 1257 healthCheckNodePort := int(svc.Spec.HealthCheckNodePort) 1258 if healthCheckNodePort == 0 { 1259 framework.Failf("Service HealthCheck NodePort was not allocated") 1260 } 1261 ginkgo.DeferCleanup(func(ctx context.Context) { 1262 err = jig.ChangeServiceType(ctx, v1.ServiceTypeClusterIP, loadBalancerCreateTimeout) 1263 framework.ExpectNoError(err) 1264 1265 // Make sure we didn't leak the health check node port. 1266 const threshold = 2 1267 nodes, err := getEndpointNodesWithInternalIP(ctx, jig) 1268 framework.ExpectNoError(err) 1269 config := e2enetwork.NewNetworkingTestConfig(ctx, f) 1270 for _, internalIP := range nodes { 1271 err := testHTTPHealthCheckNodePortFromTestContainer(ctx, 1272 config, 1273 internalIP, 1274 healthCheckNodePort, 1275 e2eservice.KubeProxyLagTimeout, 1276 false, 1277 threshold) 1278 framework.ExpectNoError(err) 1279 } 1280 err = cs.CoreV1().Services(svc.Namespace).Delete(ctx, svc.Name, metav1.DeleteOptions{}) 1281 framework.ExpectNoError(err) 1282 }) 1283 1284 svcTCPPort := int(svc.Spec.Ports[0].Port) 1285 ingressIP := e2eservice.GetIngressPoint(&svc.Status.LoadBalancer.Ingress[0]) 1286 1287 ginkgo.By("reading clientIP using the TCP service's service port via its external VIP") 1288 clientIPPort, err := GetHTTPContent(ingressIP, svcTCPPort, e2eservice.KubeProxyLagTimeout, "/clientip") 1289 framework.ExpectNoError(err) 1290 framework.Logf("ClientIP detected by target pod using VIP:SvcPort is %s", clientIPPort) 1291 1292 ginkgo.By("checking if Source IP is preserved") 1293 // The clientIPPort returned from GetHTTPContent is in this format: x.x.x.x:port or [xx:xx:xx::x]:port 1294 host, _, err := net.SplitHostPort(clientIPPort) 1295 if err != nil { 1296 framework.Failf("SplitHostPort returned unexpected error: %q", clientIPPort) 1297 } 1298 ip := netutils.ParseIPSloppy(host) 1299 if ip == nil { 1300 framework.Failf("Invalid client IP address format: %q", host) 1301 } 1302 if subnetPrefix.Contains(ip) { 1303 framework.Failf("Source IP was NOT preserved") 1304 } 1305 }) 1306 1307 ginkgo.It("should work for type=NodePort", func(ctx context.Context) { 1308 namespace := f.Namespace.Name 1309 serviceName := "external-local-nodeport" 1310 jig := e2eservice.NewTestJig(cs, namespace, serviceName) 1311 1312 svc, err := jig.CreateOnlyLocalNodePortService(ctx, true) 1313 framework.ExpectNoError(err) 1314 ginkgo.DeferCleanup(func(ctx context.Context) { 1315 err := cs.CoreV1().Services(svc.Namespace).Delete(ctx, svc.Name, metav1.DeleteOptions{}) 1316 framework.ExpectNoError(err) 1317 }) 1318 1319 tcpNodePort := int(svc.Spec.Ports[0].NodePort) 1320 1321 endpointsNodeMap, err := getEndpointNodesWithInternalIP(ctx, jig) 1322 framework.ExpectNoError(err) 1323 1324 dialCmd := "clientip" 1325 config := e2enetwork.NewNetworkingTestConfig(ctx, f) 1326 1327 for nodeName, nodeIP := range endpointsNodeMap { 1328 ginkgo.By(fmt.Sprintf("reading clientIP using the TCP service's NodePort, on node %v: %v:%v/%v", nodeName, nodeIP, tcpNodePort, dialCmd)) 1329 clientIP, err := GetHTTPContentFromTestContainer(ctx, config, nodeIP, tcpNodePort, e2eservice.KubeProxyLagTimeout, dialCmd) 1330 framework.ExpectNoError(err) 1331 framework.Logf("ClientIP detected by target pod using NodePort is %s, the ip of test container is %s", clientIP, config.TestContainerPod.Status.PodIP) 1332 // the clientIP returned by agnhost contains port 1333 if !strings.HasPrefix(clientIP, config.TestContainerPod.Status.PodIP) { 1334 framework.Failf("Source IP was NOT preserved") 1335 } 1336 } 1337 }) 1338 1339 ginkgo.It("should only target nodes with endpoints", func(ctx context.Context) { 1340 namespace := f.Namespace.Name 1341 serviceName := "external-local-nodes" 1342 jig := e2eservice.NewTestJig(cs, namespace, serviceName) 1343 nodes, err := e2enode.GetBoundedReadySchedulableNodes(ctx, cs, e2eservice.MaxNodesForEndpointsTests) 1344 framework.ExpectNoError(err) 1345 1346 svc, err := jig.CreateOnlyLocalLoadBalancerService(ctx, loadBalancerCreateTimeout, false, 1347 func(svc *v1.Service) { 1348 // Change service port to avoid collision with opened hostPorts 1349 // in other tests that run in parallel. 1350 if len(svc.Spec.Ports) != 0 { 1351 svc.Spec.Ports[0].TargetPort = intstr.FromInt32(svc.Spec.Ports[0].Port) 1352 svc.Spec.Ports[0].Port = 8081 1353 } 1354 1355 }) 1356 framework.ExpectNoError(err) 1357 ginkgo.DeferCleanup(func(ctx context.Context) { 1358 err = jig.ChangeServiceType(ctx, v1.ServiceTypeClusterIP, loadBalancerCreateTimeout) 1359 framework.ExpectNoError(err) 1360 err := cs.CoreV1().Services(svc.Namespace).Delete(ctx, svc.Name, metav1.DeleteOptions{}) 1361 framework.ExpectNoError(err) 1362 }) 1363 1364 healthCheckNodePort := int(svc.Spec.HealthCheckNodePort) 1365 if healthCheckNodePort == 0 { 1366 framework.Failf("Service HealthCheck NodePort was not allocated") 1367 } 1368 1369 ips := e2enode.CollectAddresses(nodes, v1.NodeInternalIP) 1370 1371 ingressIP := e2eservice.GetIngressPoint(&svc.Status.LoadBalancer.Ingress[0]) 1372 svcTCPPort := int(svc.Spec.Ports[0].Port) 1373 1374 const threshold = 2 1375 config := e2enetwork.NewNetworkingTestConfig(ctx, f) 1376 for i := 0; i < len(nodes.Items); i++ { 1377 endpointNodeName := nodes.Items[i].Name 1378 1379 ginkgo.By("creating a pod to be part of the service " + serviceName + " on node " + endpointNodeName) 1380 _, err = jig.Run(ctx, func(rc *v1.ReplicationController) { 1381 rc.Name = serviceName 1382 if endpointNodeName != "" { 1383 rc.Spec.Template.Spec.NodeName = endpointNodeName 1384 } 1385 }) 1386 framework.ExpectNoError(err) 1387 1388 ginkgo.By(fmt.Sprintf("waiting for service endpoint on node %v", endpointNodeName)) 1389 err = jig.WaitForEndpointOnNode(ctx, endpointNodeName) 1390 framework.ExpectNoError(err) 1391 1392 // HealthCheck should pass only on the node where num(endpoints) > 0 1393 // All other nodes should fail the healthcheck on the service healthCheckNodePort 1394 for n, internalIP := range ips { 1395 // Make sure the loadbalancer picked up the health check change. 1396 // Confirm traffic can reach backend through LB before checking healthcheck nodeport. 1397 e2eservice.TestReachableHTTP(ctx, ingressIP, svcTCPPort, e2eservice.KubeProxyLagTimeout) 1398 expectedSuccess := nodes.Items[n].Name == endpointNodeName 1399 port := strconv.Itoa(healthCheckNodePort) 1400 ipPort := net.JoinHostPort(internalIP, port) 1401 framework.Logf("Health checking %s, http://%s/healthz, expectedSuccess %v", nodes.Items[n].Name, ipPort, expectedSuccess) 1402 err := testHTTPHealthCheckNodePortFromTestContainer(ctx, 1403 config, 1404 internalIP, 1405 healthCheckNodePort, 1406 e2eservice.KubeProxyEndpointLagTimeout, 1407 expectedSuccess, 1408 threshold) 1409 framework.ExpectNoError(err) 1410 } 1411 framework.ExpectNoError(e2erc.DeleteRCAndWaitForGC(ctx, f.ClientSet, namespace, serviceName)) 1412 } 1413 }) 1414 1415 ginkgo.It("should work from pods", func(ctx context.Context) { 1416 var err error 1417 namespace := f.Namespace.Name 1418 serviceName := "external-local-pods" 1419 jig := e2eservice.NewTestJig(cs, namespace, serviceName) 1420 1421 svc, err := jig.CreateOnlyLocalLoadBalancerService(ctx, loadBalancerCreateTimeout, true, nil) 1422 framework.ExpectNoError(err) 1423 ginkgo.DeferCleanup(func(ctx context.Context) { 1424 err = jig.ChangeServiceType(ctx, v1.ServiceTypeClusterIP, loadBalancerCreateTimeout) 1425 framework.ExpectNoError(err) 1426 err := cs.CoreV1().Services(svc.Namespace).Delete(ctx, svc.Name, metav1.DeleteOptions{}) 1427 framework.ExpectNoError(err) 1428 }) 1429 1430 ingressIP := e2eservice.GetIngressPoint(&svc.Status.LoadBalancer.Ingress[0]) 1431 port := strconv.Itoa(int(svc.Spec.Ports[0].Port)) 1432 ipPort := net.JoinHostPort(ingressIP, port) 1433 path := fmt.Sprintf("%s/clientip", ipPort) 1434 1435 ginkgo.By("Creating pause pod deployment to make sure, pausePods are in desired state") 1436 deployment := createPausePodDeployment(ctx, cs, "pause-pod-deployment", namespace, 1) 1437 framework.ExpectNoError(e2edeployment.WaitForDeploymentComplete(cs, deployment), "Failed to complete pause pod deployment") 1438 1439 ginkgo.DeferCleanup(func(ctx context.Context) { 1440 framework.Logf("Deleting deployment") 1441 err = cs.AppsV1().Deployments(namespace).Delete(ctx, deployment.Name, metav1.DeleteOptions{}) 1442 framework.ExpectNoError(err, "Failed to delete deployment %s", deployment.Name) 1443 }) 1444 1445 deployment, err = cs.AppsV1().Deployments(namespace).Get(ctx, deployment.Name, metav1.GetOptions{}) 1446 framework.ExpectNoError(err, "Error in retrieving pause pod deployment") 1447 labelSelector, err := metav1.LabelSelectorAsSelector(deployment.Spec.Selector) 1448 framework.ExpectNoError(err, "Error in setting LabelSelector as selector from deployment") 1449 1450 pausePods, err := cs.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: labelSelector.String()}) 1451 framework.ExpectNoError(err, "Error in listing pods associated with pause pod deployments") 1452 1453 pausePod := pausePods.Items[0] 1454 framework.Logf("Waiting up to %v curl %v", e2eservice.KubeProxyLagTimeout, path) 1455 cmd := fmt.Sprintf(`curl -q -s --connect-timeout 30 %v`, path) 1456 1457 var srcIP string 1458 loadBalancerPropagationTimeout := e2eservice.GetServiceLoadBalancerPropagationTimeout(ctx, cs) 1459 ginkgo.By(fmt.Sprintf("Hitting external lb %v from pod %v on node %v", ingressIP, pausePod.Name, pausePod.Spec.NodeName)) 1460 if pollErr := wait.PollImmediate(framework.Poll, loadBalancerPropagationTimeout, func() (bool, error) { 1461 stdout, err := e2eoutput.RunHostCmd(pausePod.Namespace, pausePod.Name, cmd) 1462 if err != nil { 1463 framework.Logf("got err: %v, retry until timeout", err) 1464 return false, nil 1465 } 1466 srcIP = strings.TrimSpace(strings.Split(stdout, ":")[0]) 1467 return srcIP == pausePod.Status.PodIP, nil 1468 }); pollErr != nil { 1469 framework.Failf("Source IP not preserved from %v, expected '%v' got '%v'", pausePod.Name, pausePod.Status.PodIP, srcIP) 1470 } 1471 }) 1472 1473 ginkgo.It("should handle updates to ExternalTrafficPolicy field", func(ctx context.Context) { 1474 namespace := f.Namespace.Name 1475 serviceName := "external-local-update" 1476 jig := e2eservice.NewTestJig(cs, namespace, serviceName) 1477 1478 nodes, err := e2enode.GetBoundedReadySchedulableNodes(ctx, cs, e2eservice.MaxNodesForEndpointsTests) 1479 framework.ExpectNoError(err) 1480 if len(nodes.Items) < 2 { 1481 framework.Failf("Need at least 2 nodes to verify source ip from a node without endpoint") 1482 } 1483 1484 svc, err := jig.CreateOnlyLocalLoadBalancerService(ctx, loadBalancerCreateTimeout, true, nil) 1485 framework.ExpectNoError(err) 1486 ginkgo.DeferCleanup(func(ctx context.Context) { 1487 err = jig.ChangeServiceType(ctx, v1.ServiceTypeClusterIP, loadBalancerCreateTimeout) 1488 framework.ExpectNoError(err) 1489 err := cs.CoreV1().Services(svc.Namespace).Delete(ctx, svc.Name, metav1.DeleteOptions{}) 1490 framework.ExpectNoError(err) 1491 }) 1492 1493 // save the health check node port because it disappears when ESIPP is turned off. 1494 healthCheckNodePort := int(svc.Spec.HealthCheckNodePort) 1495 1496 ginkgo.By("turning ESIPP off") 1497 svc, err = jig.UpdateService(ctx, func(svc *v1.Service) { 1498 svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyCluster 1499 }) 1500 framework.ExpectNoError(err) 1501 if svc.Spec.HealthCheckNodePort > 0 { 1502 framework.Failf("Service HealthCheck NodePort still present") 1503 } 1504 1505 epNodes, err := jig.ListNodesWithEndpoint(ctx) 1506 framework.ExpectNoError(err) 1507 // map from name of nodes with endpoint to internal ip 1508 // it is assumed that there is only a single node with the endpoint 1509 endpointNodeMap := make(map[string]string) 1510 // map from name of nodes without endpoint to internal ip 1511 noEndpointNodeMap := make(map[string]string) 1512 for _, node := range epNodes { 1513 ips := e2enode.GetAddresses(&node, v1.NodeInternalIP) 1514 if len(ips) < 1 { 1515 framework.Failf("No internal ip found for node %s", node.Name) 1516 } 1517 endpointNodeMap[node.Name] = ips[0] 1518 } 1519 for _, n := range nodes.Items { 1520 ips := e2enode.GetAddresses(&n, v1.NodeInternalIP) 1521 if len(ips) < 1 { 1522 framework.Failf("No internal ip found for node %s", n.Name) 1523 } 1524 if _, ok := endpointNodeMap[n.Name]; !ok { 1525 noEndpointNodeMap[n.Name] = ips[0] 1526 } 1527 } 1528 gomega.Expect(endpointNodeMap).ToNot(gomega.BeEmpty()) 1529 gomega.Expect(noEndpointNodeMap).ToNot(gomega.BeEmpty()) 1530 1531 svcTCPPort := int(svc.Spec.Ports[0].Port) 1532 svcNodePort := int(svc.Spec.Ports[0].NodePort) 1533 ingressIP := e2eservice.GetIngressPoint(&svc.Status.LoadBalancer.Ingress[0]) 1534 path := "/clientip" 1535 dialCmd := "clientip" 1536 1537 config := e2enetwork.NewNetworkingTestConfig(ctx, f) 1538 1539 ginkgo.By(fmt.Sprintf("endpoints present on nodes %v, absent on nodes %v", endpointNodeMap, noEndpointNodeMap)) 1540 for nodeName, nodeIP := range noEndpointNodeMap { 1541 ginkgo.By(fmt.Sprintf("Checking %v (%v:%v/%v) proxies to endpoints on another node", nodeName, nodeIP[0], svcNodePort, dialCmd)) 1542 _, err := GetHTTPContentFromTestContainer(ctx, config, nodeIP, svcNodePort, e2eservice.KubeProxyLagTimeout, dialCmd) 1543 framework.ExpectNoError(err, "Could not reach HTTP service through %v:%v/%v after %v", nodeIP, svcNodePort, dialCmd, e2eservice.KubeProxyLagTimeout) 1544 } 1545 1546 for nodeName, nodeIP := range endpointNodeMap { 1547 ginkgo.By(fmt.Sprintf("checking kube-proxy health check fails on node with endpoint (%s), public IP %s", nodeName, nodeIP)) 1548 var body string 1549 pollFn := func() (bool, error) { 1550 // we expect connection failure here, but not other errors 1551 resp, err := config.GetResponseFromTestContainer(ctx, 1552 "http", 1553 "healthz", 1554 nodeIP, 1555 healthCheckNodePort) 1556 if err != nil { 1557 return false, nil 1558 } 1559 if len(resp.Errors) > 0 { 1560 return true, nil 1561 } 1562 if len(resp.Responses) > 0 { 1563 body = resp.Responses[0] 1564 } 1565 return false, nil 1566 } 1567 if pollErr := wait.PollImmediate(framework.Poll, e2eservice.TestTimeout, pollFn); pollErr != nil { 1568 framework.Failf("Kube-proxy still exposing health check on node %v:%v, after ESIPP was turned off. body %s", 1569 nodeName, healthCheckNodePort, body) 1570 } 1571 } 1572 1573 // Poll till kube-proxy re-adds the MASQUERADE rule on the node. 1574 ginkgo.By(fmt.Sprintf("checking source ip is NOT preserved through loadbalancer %v", ingressIP)) 1575 var clientIP string 1576 pollErr := wait.PollImmediate(framework.Poll, 3*e2eservice.KubeProxyLagTimeout, func() (bool, error) { 1577 clientIPPort, err := GetHTTPContent(ingressIP, svcTCPPort, e2eservice.KubeProxyLagTimeout, path) 1578 if err != nil { 1579 return false, nil 1580 } 1581 // The clientIPPort returned from GetHTTPContent is in this format: x.x.x.x:port or [xx:xx:xx::x]:port 1582 host, _, err := net.SplitHostPort(clientIPPort) 1583 if err != nil { 1584 framework.Logf("SplitHostPort returned unexpected error: %q", clientIPPort) 1585 return false, nil 1586 } 1587 ip := netutils.ParseIPSloppy(host) 1588 if ip == nil { 1589 framework.Logf("Invalid client IP address format: %q", host) 1590 return false, nil 1591 } 1592 if subnetPrefix.Contains(ip) { 1593 return true, nil 1594 } 1595 return false, nil 1596 }) 1597 if pollErr != nil { 1598 framework.Failf("Source IP WAS preserved even after ESIPP turned off. Got %v, expected a ten-dot cluster ip.", clientIP) 1599 } 1600 1601 // TODO: We need to attempt to create another service with the previously 1602 // allocated healthcheck nodePort. If the health check nodePort has been 1603 // freed, the new service creation will succeed, upon which we cleanup. 1604 // If the health check nodePort has NOT been freed, the new service 1605 // creation will fail. 1606 1607 ginkgo.By("setting ExternalTraffic field back to OnlyLocal") 1608 svc, err = jig.UpdateService(ctx, func(svc *v1.Service) { 1609 svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyLocal 1610 // Request the same healthCheckNodePort as before, to test the user-requested allocation path 1611 svc.Spec.HealthCheckNodePort = int32(healthCheckNodePort) 1612 }) 1613 framework.ExpectNoError(err) 1614 loadBalancerPropagationTimeout := e2eservice.GetServiceLoadBalancerPropagationTimeout(ctx, cs) 1615 pollErr = wait.PollImmediate(framework.PollShortTimeout, loadBalancerPropagationTimeout, func() (bool, error) { 1616 clientIPPort, err := GetHTTPContent(ingressIP, svcTCPPort, e2eservice.KubeProxyLagTimeout, path) 1617 if err != nil { 1618 return false, nil 1619 } 1620 ginkgo.By(fmt.Sprintf("Endpoint %v:%v%v returned client ip %v", ingressIP, svcTCPPort, path, clientIPPort)) 1621 // The clientIPPort returned from GetHTTPContent is in this format: x.x.x.x:port or [xx:xx:xx::x]:port 1622 host, _, err := net.SplitHostPort(clientIPPort) 1623 if err != nil { 1624 framework.Logf("SplitHostPort returned unexpected error: %q", clientIPPort) 1625 return false, nil 1626 } 1627 ip := netutils.ParseIPSloppy(host) 1628 if ip == nil { 1629 framework.Logf("Invalid client IP address format: %q", host) 1630 return false, nil 1631 } 1632 if !subnetPrefix.Contains(ip) { 1633 return true, nil 1634 } 1635 return false, nil 1636 }) 1637 if pollErr != nil { 1638 framework.Failf("Source IP (%v) is not the client IP even after ESIPP turned on, expected a public IP.", clientIP) 1639 } 1640 }) 1641 }) 1642 1643 func testRollingUpdateLBConnectivityDisruption(ctx context.Context, f *framework.Framework, externalTrafficPolicy v1.ServiceExternalTrafficPolicyType, minSuccessRate float64) { 1644 cs := f.ClientSet 1645 ns := f.Namespace.Name 1646 name := "test-lb-rolling-update" 1647 labels := map[string]string{"name": name} 1648 gracePeriod := int64(60) 1649 maxUnavailable := intstr.FromString("10%") 1650 ds := e2edaemonset.NewDaemonSet(name, e2eapps.AgnhostImage, labels, nil, nil, 1651 []v1.ContainerPort{ 1652 {ContainerPort: 80}, 1653 }, 1654 "netexec", "--http-port=80", fmt.Sprintf("--delay-shutdown=%d", gracePeriod), 1655 ) 1656 ds.Spec.UpdateStrategy = appsv1.DaemonSetUpdateStrategy{ 1657 Type: appsv1.RollingUpdateDaemonSetStrategyType, 1658 RollingUpdate: &appsv1.RollingUpdateDaemonSet{ 1659 MaxUnavailable: &maxUnavailable, 1660 }, 1661 } 1662 ds.Spec.Template.Labels = labels 1663 ds.Spec.Template.Spec.TerminationGracePeriodSeconds = utilpointer.Int64(gracePeriod) 1664 1665 nodeNames := e2edaemonset.SchedulableNodes(ctx, cs, ds) 1666 e2eskipper.SkipUnlessAtLeast(len(nodeNames), 2, "load-balancer rolling update test requires at least 2 schedulable nodes for the DaemonSet") 1667 if len(nodeNames) > 25 { 1668 e2eskipper.Skipf("load-balancer rolling update test skipped for large environments with more than 25 nodes") 1669 } 1670 1671 ginkgo.By(fmt.Sprintf("Creating DaemonSet %q", name)) 1672 ds, err := cs.AppsV1().DaemonSets(ns).Create(context.TODO(), ds, metav1.CreateOptions{}) 1673 framework.ExpectNoError(err) 1674 1675 ginkgo.By("Checking that daemon pods launch on every schedulable node of the cluster") 1676 creationTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs) 1677 err = wait.PollUntilContextTimeout(ctx, framework.Poll, creationTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, nodeNames)) 1678 framework.ExpectNoError(err, "error waiting for daemon pods to start") 1679 err = e2edaemonset.CheckDaemonStatus(ctx, f, name) 1680 framework.ExpectNoError(err) 1681 1682 ginkgo.By(fmt.Sprintf("Creating a service %s with type=LoadBalancer externalTrafficPolicy=%s in namespace %s", name, externalTrafficPolicy, ns)) 1683 jig := e2eservice.NewTestJig(cs, ns, name) 1684 jig.Labels = labels 1685 service, err := jig.CreateLoadBalancerService(ctx, creationTimeout, func(svc *v1.Service) { 1686 svc.Spec.ExternalTrafficPolicy = externalTrafficPolicy 1687 }) 1688 framework.ExpectNoError(err) 1689 1690 lbNameOrAddress := e2eservice.GetIngressPoint(&service.Status.LoadBalancer.Ingress[0]) 1691 svcPort := int(service.Spec.Ports[0].Port) 1692 1693 ginkgo.By("Hitting the DaemonSet's pods through the service's load balancer") 1694 timeout := e2eservice.LoadBalancerLagTimeoutDefault 1695 if framework.ProviderIs("aws") { 1696 timeout = e2eservice.LoadBalancerLagTimeoutAWS 1697 } 1698 e2eservice.TestReachableHTTP(ctx, lbNameOrAddress, svcPort, timeout) 1699 1700 ginkgo.By("Starting a goroutine to continuously hit the DaemonSet's pods through the service's load balancer") 1701 var totalRequests uint64 = 0 1702 var networkErrors uint64 = 0 1703 var httpErrors uint64 = 0 1704 done := make(chan struct{}) 1705 defer close(done) 1706 go func() { 1707 defer ginkgo.GinkgoRecover() 1708 1709 wait.Until(func() { 1710 atomic.AddUint64(&totalRequests, 1) 1711 client := &http.Client{ 1712 Transport: utilnet.SetTransportDefaults(&http.Transport{ 1713 DisableKeepAlives: true, 1714 }), 1715 Timeout: 5 * time.Second, 1716 } 1717 ipPort := net.JoinHostPort(lbNameOrAddress, strconv.Itoa(svcPort)) 1718 msg := "hello" 1719 url := fmt.Sprintf("http://%s/echo?msg=%s", ipPort, msg) 1720 resp, err := client.Get(url) 1721 if err != nil { 1722 framework.Logf("Got error testing for reachability of %s: %v", url, err) 1723 atomic.AddUint64(&networkErrors, 1) 1724 return 1725 } 1726 defer resp.Body.Close() 1727 if resp.StatusCode != http.StatusOK { 1728 framework.Logf("Got bad status code: %d", resp.StatusCode) 1729 atomic.AddUint64(&httpErrors, 1) 1730 return 1731 } 1732 body, err := io.ReadAll(resp.Body) 1733 if err != nil { 1734 framework.Logf("Got error reading HTTP body: %v", err) 1735 atomic.AddUint64(&httpErrors, 1) 1736 return 1737 } 1738 if string(body) != msg { 1739 framework.Logf("The response body does not contain expected string %s", string(body)) 1740 atomic.AddUint64(&httpErrors, 1) 1741 return 1742 } 1743 }, time.Duration(0), done) 1744 }() 1745 1746 ginkgo.By("Triggering DaemonSet rolling update several times") 1747 var previousTotalRequests uint64 = 0 1748 var previousNetworkErrors uint64 = 0 1749 var previousHttpErrors uint64 = 0 1750 for i := 1; i <= 5; i++ { 1751 framework.Logf("Update daemon pods environment: [{\"name\":\"VERSION\",\"value\":\"%d\"}]", i) 1752 patch := fmt.Sprintf(`{"spec":{"template":{"spec":{"containers":[{"name":"%s","env":[{"name":"VERSION","value":"%d"}]}]}}}}`, ds.Spec.Template.Spec.Containers[0].Name, i) 1753 ds, err = cs.AppsV1().DaemonSets(ns).Patch(context.TODO(), name, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{}) 1754 framework.ExpectNoError(err) 1755 1756 framework.Logf("Check that daemon pods are available on every node of the cluster with the updated environment.") 1757 err = wait.PollImmediate(framework.Poll, creationTimeout, func() (bool, error) { 1758 podList, err := cs.CoreV1().Pods(ds.Namespace).List(context.TODO(), metav1.ListOptions{}) 1759 if err != nil { 1760 return false, err 1761 } 1762 pods := podList.Items 1763 1764 readyPods := 0 1765 for _, pod := range pods { 1766 if !metav1.IsControlledBy(&pod, ds) { 1767 continue 1768 } 1769 if pod.DeletionTimestamp != nil { 1770 continue 1771 } 1772 podVersion := "" 1773 for _, env := range pod.Spec.Containers[0].Env { 1774 if env.Name == "VERSION" { 1775 podVersion = env.Value 1776 break 1777 } 1778 } 1779 if podVersion != fmt.Sprintf("%d", i) { 1780 continue 1781 } 1782 podReady := podutil.IsPodAvailable(&pod, ds.Spec.MinReadySeconds, metav1.Now()) 1783 if !podReady { 1784 continue 1785 } 1786 readyPods += 1 1787 } 1788 framework.Logf("Number of running nodes: %d, number of updated ready pods: %d in daemonset %s", len(nodeNames), readyPods, ds.Name) 1789 return readyPods == len(nodeNames), nil 1790 }) 1791 framework.ExpectNoError(err, "error waiting for daemon pods to be ready") 1792 1793 // assert that the HTTP requests success rate is above the acceptable threshold after this rolling update 1794 currentTotalRequests := atomic.LoadUint64(&totalRequests) 1795 currentNetworkErrors := atomic.LoadUint64(&networkErrors) 1796 currentHttpErrors := atomic.LoadUint64(&httpErrors) 1797 1798 partialTotalRequests := currentTotalRequests - previousTotalRequests 1799 partialNetworkErrors := currentNetworkErrors - previousNetworkErrors 1800 partialHttpErrors := currentHttpErrors - previousHttpErrors 1801 partialSuccessRate := (float64(partialTotalRequests) - float64(partialNetworkErrors+partialHttpErrors)) / float64(partialTotalRequests) 1802 1803 framework.Logf("Load Balancer total HTTP requests: %d", partialTotalRequests) 1804 framework.Logf("Network errors: %d", partialNetworkErrors) 1805 framework.Logf("HTTP errors: %d", partialHttpErrors) 1806 framework.Logf("Success rate: %.2f%%", partialSuccessRate*100) 1807 if partialSuccessRate < minSuccessRate { 1808 framework.Failf("Encountered too many errors when doing HTTP requests to the load balancer address. Success rate is %.2f%%, and the minimum allowed threshold is %.2f%%.", partialSuccessRate*100, minSuccessRate*100) 1809 } 1810 1811 previousTotalRequests = currentTotalRequests 1812 previousNetworkErrors = currentNetworkErrors 1813 previousHttpErrors = currentHttpErrors 1814 } 1815 1816 // assert that the load balancer address is still reachable after the rolling updates are finished 1817 e2eservice.TestReachableHTTP(ctx, lbNameOrAddress, svcPort, timeout) 1818 }