google.golang.org/grpc@v1.74.2/balancer/ringhash/ringhash_e2e_test.go (about) 1 /* 2 * 3 * Copyright 2022 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package ringhash_test 20 21 import ( 22 "context" 23 "errors" 24 "fmt" 25 "math" 26 rand "math/rand/v2" 27 "net" 28 "slices" 29 "strconv" 30 "sync" 31 "testing" 32 "time" 33 34 "github.com/google/go-cmp/cmp" 35 "github.com/google/go-cmp/cmp/cmpopts" 36 "github.com/google/uuid" 37 "google.golang.org/grpc" 38 "google.golang.org/grpc/backoff" 39 "google.golang.org/grpc/codes" 40 "google.golang.org/grpc/connectivity" 41 "google.golang.org/grpc/credentials/insecure" 42 "google.golang.org/grpc/internal" 43 "google.golang.org/grpc/internal/envconfig" 44 "google.golang.org/grpc/internal/grpctest" 45 iringhash "google.golang.org/grpc/internal/ringhash" 46 "google.golang.org/grpc/internal/stubserver" 47 "google.golang.org/grpc/internal/testutils" 48 "google.golang.org/grpc/internal/testutils/xds/e2e" 49 "google.golang.org/grpc/metadata" 50 "google.golang.org/grpc/peer" 51 "google.golang.org/grpc/resolver" 52 "google.golang.org/grpc/resolver/manual" 53 "google.golang.org/grpc/status" 54 55 v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" 56 v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 57 v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" 58 v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" 59 v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" 60 v3ringhashpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/ring_hash/v3" 61 v3matcherpb "github.com/envoyproxy/go-control-plane/envoy/type/matcher/v3" 62 testgrpc "google.golang.org/grpc/interop/grpc_testing" 63 testpb "google.golang.org/grpc/interop/grpc_testing" 64 "google.golang.org/protobuf/types/known/wrapperspb" 65 66 _ "google.golang.org/grpc/xds" 67 ) 68 69 type s struct { 70 grpctest.Tester 71 } 72 73 func Test(t *testing.T) { 74 grpctest.RunSubTests(t, s{}) 75 } 76 77 const ( 78 defaultTestTimeout = 10 * time.Second 79 defaultTestShortTimeout = 10 * time.Millisecond 80 81 errorTolerance = .05 // For tests that rely on statistical significance. 82 83 virtualHostName = "test.server" 84 85 // minRingSize is the minimum ring size to use when testing randomly a 86 // backend for each request. It lowers the skew that may occur from 87 // an imbalanced ring. 88 minRingSize = 10000 89 ) 90 91 // fastConnectParams disables connection attempts backoffs and lowers delays. 92 // This speeds up tests that rely on subchannel to move to transient failure. 93 var fastConnectParams = grpc.ConnectParams{ 94 Backoff: backoff.Config{ 95 BaseDelay: 10 * time.Millisecond, 96 }, 97 MinConnectTimeout: 100 * time.Millisecond, 98 } 99 100 // Tests the case where the ring contains a single subConn, and verifies that 101 // when the server goes down, the LB policy on the client automatically 102 // reconnects until the subChannel moves out of TRANSIENT_FAILURE. 103 func (s) TestRingHash_ReconnectToMoveOutOfTransientFailure(t *testing.T) { 104 // Create a restartable listener to simulate server being down. 105 l, err := testutils.LocalTCPListener() 106 if err != nil { 107 t.Fatalf("testutils.LocalTCPListener() failed: %v", err) 108 } 109 lis := testutils.NewRestartableListener(l) 110 srv := stubserver.StartTestService(t, &stubserver.StubServer{ 111 Listener: lis, 112 EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) { return &testpb.Empty{}, nil }, 113 }) 114 defer srv.Stop() 115 116 // Create a clientConn with a manual resolver (which is used to push the 117 // address of the test backend), and a default service config pointing to 118 // the use of the ring_hash_experimental LB policy. 119 const ringHashServiceConfig = `{"loadBalancingConfig": [{"ring_hash_experimental":{}}]}` 120 r := manual.NewBuilderWithScheme("whatever") 121 dopts := []grpc.DialOption{ 122 grpc.WithTransportCredentials(insecure.NewCredentials()), 123 grpc.WithResolvers(r), 124 grpc.WithDefaultServiceConfig(ringHashServiceConfig), 125 grpc.WithConnectParams(fastConnectParams), 126 } 127 cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...) 128 if err != nil { 129 t.Fatalf("Failed to dial local test server: %v", err) 130 } 131 defer cc.Close() 132 133 // Push the address of the test backend through the manual resolver. 134 r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: lis.Addr().String()}}}) 135 136 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 137 ctx = iringhash.SetXDSRequestHash(ctx, 0) 138 defer cancel() 139 client := testgrpc.NewTestServiceClient(cc) 140 if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { 141 t.Fatalf("rpc EmptyCall() failed: %v", err) 142 } 143 144 // Stopping the server listener will close the transport on the client, 145 // which will lead to the channel eventually moving to IDLE. The ring_hash 146 // LB policy is not expected to reconnect by itself at this point. 147 lis.Stop() 148 149 testutils.AwaitState(ctx, t, cc, connectivity.Idle) 150 151 // Make an RPC to get the ring_hash LB policy to reconnect and thereby move 152 // to TRANSIENT_FAILURE upon connection failure. 153 client.EmptyCall(ctx, &testpb.Empty{}) 154 155 testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) 156 157 // An RPC at this point is expected to fail. 158 if _, err = client.EmptyCall(ctx, &testpb.Empty{}); err == nil { 159 t.Fatal("EmptyCall RPC succeeded when the channel is in TRANSIENT_FAILURE") 160 } 161 162 // Restart the server listener. The ring_hash LB policy is expected to 163 // attempt to reconnect on its own and come out of TRANSIENT_FAILURE, even 164 // without an RPC attempt. 165 lis.Restart() 166 testutils.AwaitState(ctx, t, cc, connectivity.Ready) 167 168 // An RPC at this point is expected to succeed. 169 if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { 170 t.Fatalf("rpc EmptyCall() failed: %v", err) 171 } 172 } 173 174 // startTestServiceBackends starts num stub servers. It returns the list of 175 // stubservers. Servers are closed when the test is stopped. 176 func startTestServiceBackends(t *testing.T, num int) []*stubserver.StubServer { 177 t.Helper() 178 179 servers := make([]*stubserver.StubServer, 0, num) 180 for i := 0; i < num; i++ { 181 server := stubserver.StartTestService(t, nil) 182 t.Cleanup(server.Stop) 183 servers = append(servers, server) 184 } 185 return servers 186 } 187 188 // backendAddrs returns a list of address strings for the given stubservers. 189 func backendAddrs(servers []*stubserver.StubServer) []string { 190 addrs := make([]string, 0, len(servers)) 191 for _, s := range servers { 192 addrs = append(addrs, s.Address) 193 } 194 return addrs 195 } 196 197 // backendOptions returns a slice of e2e.BackendOptions for the given server 198 // addresses. 199 func backendOptions(t *testing.T, serverAddrs []string) []e2e.BackendOptions { 200 t.Helper() 201 backendAddrs := [][]string{} 202 for _, addr := range serverAddrs { 203 backendAddrs = append(backendAddrs, []string{addr}) 204 } 205 return backendOptionsForEndpointsWithMultipleAddrs(t, backendAddrs) 206 } 207 208 // backendOptions returns a slice of e2e.BackendOptions for the given server 209 // addresses. Each endpoint can have multiple addresses. 210 func backendOptionsForEndpointsWithMultipleAddrs(t *testing.T, backendAddrs [][]string) []e2e.BackendOptions { 211 t.Helper() 212 213 var backendOpts []e2e.BackendOptions 214 for _, backend := range backendAddrs { 215 ports := []uint32{} 216 for _, addr := range backend { 217 ports = append(ports, testutils.ParsePort(t, addr)) 218 } 219 backendOpts = append(backendOpts, e2e.BackendOptions{Ports: ports}) 220 } 221 return backendOpts 222 } 223 224 // channelIDHashRoute returns a RouteConfiguration with a hash policy that 225 // hashes based on the channel ID. 226 func channelIDHashRoute(routeName, virtualHostDomain, clusterName string) *v3routepb.RouteConfiguration { 227 route := e2e.DefaultRouteConfig(routeName, virtualHostDomain, clusterName) 228 hashPolicy := v3routepb.RouteAction_HashPolicy{ 229 PolicySpecifier: &v3routepb.RouteAction_HashPolicy_FilterState_{ 230 FilterState: &v3routepb.RouteAction_HashPolicy_FilterState{ 231 Key: "io.grpc.channel_id", 232 }, 233 }, 234 } 235 action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route) 236 action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&hashPolicy} 237 return route 238 } 239 240 // checkRPCSendOK sends num RPCs to the client. It returns a map of backend 241 // addresses as keys and number of RPCs sent to this address as value. Abort the 242 // test if any RPC fails. 243 func checkRPCSendOK(ctx context.Context, t *testing.T, client testgrpc.TestServiceClient, num int) map[string]int { 244 t.Helper() 245 246 backendCount := make(map[string]int) 247 for i := 0; i < num; i++ { 248 var remote peer.Peer 249 if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil { 250 t.Fatalf("rpc EmptyCall() failed: %v", err) 251 } 252 backendCount[remote.Addr.String()]++ 253 } 254 return backendCount 255 } 256 257 // makeUnreachableBackends returns a slice of addresses of backends that close 258 // connections as soon as they are established. Useful to simulate servers that 259 // are unreachable. 260 func makeUnreachableBackends(t *testing.T, num int) []string { 261 t.Helper() 262 263 addrs := make([]string, 0, num) 264 for i := 0; i < num; i++ { 265 l, err := testutils.LocalTCPListener() 266 if err != nil { 267 t.Fatalf("testutils.LocalTCPListener() failed: %v", err) 268 } 269 lis := testutils.NewRestartableListener(l) 270 addrs = append(addrs, lis.Addr().String()) 271 272 // It is enough to fail the first connection attempt to put the subchannel 273 // in TRANSIENT_FAILURE. 274 go func() { lis.Accept() }() 275 276 // We don't close these listeners here, to make sure ports are 277 // not reused across them, and across tests. 278 lis.Stop() 279 t.Cleanup(func() { lis.Close() }) 280 } 281 return addrs 282 } 283 284 // setupManagementServerAndResolver sets up an xDS management server, creates 285 // bootstrap configuration pointing to that server and creates an xDS resolver 286 // using that configuration. 287 // 288 // Registers a cleanup function on t to stop the management server. 289 // 290 // Returns the management server, node ID and the xDS resolver builder. 291 func setupManagementServerAndResolver(t *testing.T) (*e2e.ManagementServer, string, resolver.Builder) { 292 t.Helper() 293 294 // Start an xDS management server. 295 xdsServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true}) 296 297 // Create bootstrap configuration pointing to the above management server. 298 nodeID := uuid.New().String() 299 bc := e2e.DefaultBootstrapContents(t, nodeID, xdsServer.Address) 300 301 // Create an xDS resolver with the above bootstrap configuration. 302 if internal.NewXDSResolverWithConfigForTesting == nil { 303 t.Fatalf("internal.NewXDSResolverWithConfigForTesting is nil") 304 } 305 r, err := internal.NewXDSResolverWithConfigForTesting.(func([]byte) (resolver.Builder, error))(bc) 306 if err != nil { 307 t.Fatalf("Failed to create xDS resolver for testing: %v", err) 308 } 309 310 return xdsServer, nodeID, r 311 } 312 313 // xdsUpdateOpts returns an e2e.UpdateOptions for the given node ID with the given xDS resources. 314 func xdsUpdateOpts(nodeID string, endpoints *v3endpointpb.ClusterLoadAssignment, cluster *v3clusterpb.Cluster, route *v3routepb.RouteConfiguration, listener *v3listenerpb.Listener) e2e.UpdateOptions { 315 return e2e.UpdateOptions{ 316 NodeID: nodeID, 317 Endpoints: []*v3endpointpb.ClusterLoadAssignment{endpoints}, 318 Clusters: []*v3clusterpb.Cluster{cluster}, 319 Routes: []*v3routepb.RouteConfiguration{route}, 320 Listeners: []*v3listenerpb.Listener{listener}, 321 } 322 } 323 324 // Tests that when an aggregate cluster is configured with ring hash policy, and 325 // the first cluster is in transient failure, all RPCs are sent to the second 326 // cluster using the ring hash policy. 327 func (s) TestRingHash_AggregateClusterFallBackFromRingHashAtStartup(t *testing.T) { 328 addrs := backendAddrs(startTestServiceBackends(t, 2)) 329 330 const primaryClusterName = "new_cluster_1" 331 const primaryServiceName = "new_eds_service_1" 332 const secondaryClusterName = "new_cluster_2" 333 const secondaryServiceName = "new_eds_service_2" 334 const clusterName = "aggregate_cluster" 335 336 ep1 := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 337 ClusterName: primaryServiceName, 338 Localities: []e2e.LocalityOptions{{ 339 Name: "locality0", 340 Weight: 1, 341 Backends: backendOptions(t, makeUnreachableBackends(t, 2)), 342 }}, 343 }) 344 ep2 := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 345 ClusterName: secondaryServiceName, 346 Localities: []e2e.LocalityOptions{{ 347 Name: "locality0", 348 Weight: 1, 349 Backends: backendOptions(t, addrs), 350 }}, 351 }) 352 primaryCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 353 ClusterName: primaryClusterName, 354 ServiceName: primaryServiceName, 355 }) 356 secondaryCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 357 ClusterName: secondaryClusterName, 358 ServiceName: secondaryServiceName, 359 }) 360 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 361 ClusterName: clusterName, 362 Type: e2e.ClusterTypeAggregate, 363 // TODO: when "A75: xDS Aggregate Cluster Behavior Fixes" is implemented, the 364 // policy will have to be set on the child clusters. 365 Policy: e2e.LoadBalancingPolicyRingHash, 366 ChildNames: []string{primaryClusterName, secondaryClusterName}, 367 }) 368 route := channelIDHashRoute("new_route", virtualHostName, clusterName) 369 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 370 371 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 372 defer cancel() 373 374 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 375 updateOpts := e2e.UpdateOptions{ 376 NodeID: nodeID, 377 Endpoints: []*v3endpointpb.ClusterLoadAssignment{ep1, ep2}, 378 Clusters: []*v3clusterpb.Cluster{cluster, primaryCluster, secondaryCluster}, 379 Routes: []*v3routepb.RouteConfiguration{route}, 380 Listeners: []*v3listenerpb.Listener{listener}, 381 } 382 if err := xdsServer.Update(ctx, updateOpts); err != nil { 383 t.Fatalf("Failed to update xDS resources: %v", err) 384 } 385 386 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 387 if err != nil { 388 t.Fatalf("Failed to create client: %s", err) 389 } 390 defer conn.Close() 391 client := testgrpc.NewTestServiceClient(conn) 392 393 const numRPCs = 100 394 gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs) 395 396 // Since this is using ring hash with the channel ID as the key, all RPCs 397 // are routed to the same backend of the secondary locality. 398 if len(gotPerBackend) != 1 { 399 t.Errorf("Got RPCs routed to %v backends, want %v", len(gotPerBackend), 1) 400 } 401 402 var backend string 403 var got int 404 for backend, got = range gotPerBackend { 405 } 406 if !slices.Contains(addrs, backend) { 407 t.Errorf("Got RPCs routed to an unexpected backend: %v, want one of %v", backend, addrs) 408 } 409 if got != numRPCs { 410 t.Errorf("Got %v RPCs routed to a backend, want %v", got, 100) 411 } 412 } 413 414 func replaceDNSResolver(t *testing.T) *manual.Resolver { 415 mr := manual.NewBuilderWithScheme("dns") 416 417 dnsResolverBuilder := resolver.Get("dns") 418 resolver.Register(mr) 419 420 t.Cleanup(func() { resolver.Register(dnsResolverBuilder) }) 421 return mr 422 } 423 424 // Tests that when an aggregate cluster is configured with ring hash policy, and 425 // the first is an EDS cluster in transient failure, and the fallback is a 426 // logical DNS cluster, all RPCs are sent to the second cluster using the ring 427 // hash policy. 428 func (s) TestRingHash_AggregateClusterFallBackFromRingHashToLogicalDnsAtStartup(t *testing.T) { 429 const edsClusterName = "eds_cluster" 430 const logicalDNSClusterName = "logical_dns_cluster" 431 const clusterName = "aggregate_cluster" 432 433 backends := backendAddrs(startTestServiceBackends(t, 1)) 434 435 endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 436 ClusterName: edsClusterName, 437 Localities: []e2e.LocalityOptions{{ 438 Name: "locality0", 439 Weight: 1, 440 Backends: backendOptions(t, makeUnreachableBackends(t, 1)), 441 Priority: 0, 442 }}, 443 }) 444 edsCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 445 ClusterName: edsClusterName, 446 ServiceName: edsClusterName, 447 }) 448 449 logicalDNSCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 450 Type: e2e.ClusterTypeLogicalDNS, 451 ClusterName: logicalDNSClusterName, 452 // The DNS values are not used because we fake DNS later on, but they 453 // are required to be present for the resource to be valid. 454 DNSHostName: "server.example.com", 455 DNSPort: 443, 456 }) 457 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 458 ClusterName: clusterName, 459 Type: e2e.ClusterTypeAggregate, 460 // TODO: when "A75: xDS Aggregate Cluster Behavior Fixes" is merged, the 461 // policy will have to be set on the child clusters. 462 Policy: e2e.LoadBalancingPolicyRingHash, 463 ChildNames: []string{edsClusterName, logicalDNSClusterName}, 464 }) 465 route := channelIDHashRoute("new_route", virtualHostName, clusterName) 466 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 467 468 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 469 defer cancel() 470 471 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 472 updateOpts := e2e.UpdateOptions{ 473 NodeID: nodeID, 474 Endpoints: []*v3endpointpb.ClusterLoadAssignment{endpoints}, 475 Clusters: []*v3clusterpb.Cluster{cluster, edsCluster, logicalDNSCluster}, 476 Routes: []*v3routepb.RouteConfiguration{route}, 477 Listeners: []*v3listenerpb.Listener{listener}, 478 } 479 480 dnsR := replaceDNSResolver(t) 481 dnsR.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: backends[0]}}}) 482 483 if err := xdsServer.Update(ctx, updateOpts); err != nil { 484 t.Fatalf("Failed to update xDS resources: %v", err) 485 } 486 487 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 488 if err != nil { 489 t.Fatalf("Failed to create client: %s", err) 490 } 491 defer conn.Close() 492 client := testgrpc.NewTestServiceClient(conn) 493 494 gotPerBackend := checkRPCSendOK(ctx, t, client, 1) 495 var got string 496 for got = range gotPerBackend { 497 } 498 if want := backends[0]; got != want { 499 t.Errorf("Got RPCs routed to an unexpected got: %v, want %v", got, want) 500 } 501 } 502 503 // Tests that when an aggregate cluster is configured with ring hash policy, and 504 // it's first child is in transient failure, and the fallback is a logical DNS, 505 // the later recovers from transient failure when its backend becomes available. 506 func (s) TestRingHash_AggregateClusterFallBackFromRingHashToLogicalDnsAtStartupNoFailedRPCs(t *testing.T) { 507 const edsClusterName = "eds_cluster" 508 const logicalDNSClusterName = "logical_dns_cluster" 509 const clusterName = "aggregate_cluster" 510 511 backends := backendAddrs(startTestServiceBackends(t, 1)) 512 513 endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 514 ClusterName: edsClusterName, 515 Localities: []e2e.LocalityOptions{{ 516 Name: "locality0", 517 Weight: 1, 518 Backends: backendOptions(t, makeUnreachableBackends(t, 1)), 519 Priority: 0, 520 }}, 521 }) 522 edsCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 523 ClusterName: edsClusterName, 524 ServiceName: edsClusterName, 525 }) 526 527 logicalDNSCluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 528 Type: e2e.ClusterTypeLogicalDNS, 529 ClusterName: logicalDNSClusterName, 530 // The DNS values are not used because we fake DNS later on, but they 531 // are required to be present for the resource to be valid. 532 DNSHostName: "server.example.com", 533 DNSPort: 443, 534 }) 535 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 536 ClusterName: clusterName, 537 Type: e2e.ClusterTypeAggregate, 538 // TODO: when "A75: xDS Aggregate Cluster Behavior Fixes" is merged, the 539 // policy will have to be set on the child clusters. 540 Policy: e2e.LoadBalancingPolicyRingHash, 541 ChildNames: []string{edsClusterName, logicalDNSClusterName}, 542 }) 543 route := channelIDHashRoute("new_route", virtualHostName, clusterName) 544 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 545 546 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 547 defer cancel() 548 549 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 550 updateOpts := e2e.UpdateOptions{ 551 NodeID: nodeID, 552 Endpoints: []*v3endpointpb.ClusterLoadAssignment{endpoints}, 553 Clusters: []*v3clusterpb.Cluster{cluster, edsCluster, logicalDNSCluster}, 554 Routes: []*v3routepb.RouteConfiguration{route}, 555 Listeners: []*v3listenerpb.Listener{listener}, 556 } 557 558 dnsR := replaceDNSResolver(t) 559 dnsR.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: backends[0]}}}) 560 561 if err := xdsServer.Update(ctx, updateOpts); err != nil { 562 t.Fatalf("Failed to update xDS resources: %v", err) 563 } 564 565 dialer := testutils.NewBlockingDialer() 566 cp := grpc.ConnectParams{ 567 // Increase backoff time, so that subconns stay in TRANSIENT_FAILURE 568 // for long enough to trigger potential problems. 569 Backoff: backoff.Config{ 570 BaseDelay: defaultTestTimeout, 571 }, 572 MinConnectTimeout: 0, 573 } 574 dopts := []grpc.DialOption{ 575 grpc.WithResolvers(xdsResolver), 576 grpc.WithTransportCredentials(insecure.NewCredentials()), 577 grpc.WithContextDialer(dialer.DialContext), 578 grpc.WithConnectParams(cp)} 579 conn, err := grpc.NewClient("xds:///test.server", dopts...) 580 if err != nil { 581 t.Fatalf("Failed to create client: %s", err) 582 } 583 defer conn.Close() 584 client := testgrpc.NewTestServiceClient(conn) 585 586 hold := dialer.Hold(backends[0]) 587 588 errCh := make(chan error, 2) 589 go func() { 590 if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { 591 errCh <- fmt.Errorf("first rpc UnaryCall() failed: %v", err) 592 return 593 } 594 errCh <- nil 595 }() 596 597 testutils.AwaitState(ctx, t, conn, connectivity.Connecting) 598 599 go func() { 600 // Start a second RPC at this point, which should be queued as well. 601 // This will fail if the priority policy fails to update the picker to 602 // point to the LOGICAL_DNS child; if it leaves it pointing to the EDS 603 // priority 1, then the RPC will fail, because all subchannels are in 604 // transient failure. 605 // 606 // Note that sending only the first RPC does not catch this case, 607 // because if the priority policy fails to update the picker, then the 608 // pick for the first RPC will not be retried. 609 if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { 610 errCh <- fmt.Errorf("second UnaryCall() failed: %v", err) 611 return 612 } 613 errCh <- nil 614 }() 615 616 // Wait for a connection attempt to backends[0]. 617 if !hold.Wait(ctx) { 618 t.Fatalf("Timeout while waiting for a connection attempt to %s", backends[0]) 619 } 620 // Allow the connection attempts to complete. 621 hold.Resume() 622 623 // RPCs should complete successfully. 624 for range []int{0, 1} { 625 select { 626 case err := <-errCh: 627 if err != nil { 628 t.Errorf("Expected 2 rpc to succeed, but at least one failed: %v", err) 629 } 630 case <-ctx.Done(): 631 t.Fatalf("Timed out waiting for RPCs to complete") 632 } 633 } 634 } 635 636 // endpointResource creates a ClusterLoadAssignment containing a single locality 637 // with the given addresses. 638 func endpointResource(t *testing.T, clusterName string, addrs []string) *v3endpointpb.ClusterLoadAssignment { 639 t.Helper() 640 backendAddrs := [][]string{} 641 for _, addr := range addrs { 642 backendAddrs = append(backendAddrs, []string{addr}) 643 } 644 return endpointResourceForBackendsWithMultipleAddrs(t, clusterName, backendAddrs) 645 } 646 647 // endpointResourceForBackendsWithMultipleAddrs creates a ClusterLoadAssignment 648 // containing a single locality with the given addresses. 649 func endpointResourceForBackendsWithMultipleAddrs(t *testing.T, clusterName string, addrs [][]string) *v3endpointpb.ClusterLoadAssignment { 650 t.Helper() 651 652 // We must set the host name socket address in EDS, as the ring hash policy 653 // uses it to construct the ring. 654 host, _, err := net.SplitHostPort(addrs[0][0]) 655 if err != nil { 656 t.Fatalf("Failed to split host and port from stubserver: %v", err) 657 } 658 659 return e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 660 ClusterName: clusterName, 661 Host: host, 662 Localities: []e2e.LocalityOptions{{ 663 Backends: backendOptionsForEndpointsWithMultipleAddrs(t, addrs), 664 Weight: 1, 665 }}, 666 }) 667 } 668 669 // Tests that ring hash policy that hashes using channel id ensures all RPCs to 670 // go 1 particular backend. 671 func (s) TestRingHash_ChannelIdHashing(t *testing.T) { 672 backends := backendAddrs(startTestServiceBackends(t, 4)) 673 674 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 675 676 const clusterName = "cluster" 677 endpoints := endpointResource(t, clusterName, backends) 678 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 679 ClusterName: clusterName, 680 ServiceName: clusterName, 681 Policy: e2e.LoadBalancingPolicyRingHash, 682 }) 683 route := channelIDHashRoute("new_route", virtualHostName, clusterName) 684 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 685 686 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 687 defer cancel() 688 689 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 690 t.Fatalf("Failed to update xDS resources: %v", err) 691 } 692 693 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 694 if err != nil { 695 t.Fatalf("Failed to create client: %s", err) 696 } 697 defer conn.Close() 698 client := testgrpc.NewTestServiceClient(conn) 699 700 const numRPCs = 100 701 received := checkRPCSendOK(ctx, t, client, numRPCs) 702 if len(received) != 1 { 703 t.Errorf("Got RPCs routed to %v backends, want %v", len(received), 1) 704 } 705 var got int 706 for _, got = range received { 707 } 708 if got != numRPCs { 709 t.Errorf("Got %v RPCs routed to a backend, want %v", got, numRPCs) 710 } 711 } 712 713 // headerHashRoute creates a RouteConfiguration with a hash policy that uses the 714 // provided header. 715 func headerHashRoute(routeName, virtualHostName, clusterName, header string) *v3routepb.RouteConfiguration { 716 route := e2e.DefaultRouteConfig(routeName, virtualHostName, clusterName) 717 hashPolicy := v3routepb.RouteAction_HashPolicy{ 718 PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{ 719 Header: &v3routepb.RouteAction_HashPolicy_Header{ 720 HeaderName: header, 721 }, 722 }, 723 } 724 action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route) 725 action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&hashPolicy} 726 return route 727 } 728 729 // Tests that ring hash policy that hashes using a header value can send RPCs 730 // to specific backends based on their hash. 731 func (s) TestRingHash_HeaderHashing(t *testing.T) { 732 backends := backendAddrs(startTestServiceBackends(t, 4)) 733 734 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 735 736 const clusterName = "cluster" 737 endpoints := endpointResource(t, clusterName, backends) 738 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 739 ClusterName: clusterName, 740 ServiceName: clusterName, 741 Policy: e2e.LoadBalancingPolicyRingHash, 742 }) 743 route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash") 744 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 745 746 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 747 defer cancel() 748 749 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 750 t.Fatalf("Failed to update xDS resources: %v", err) 751 } 752 753 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 754 if err != nil { 755 t.Fatalf("Failed to create client: %s", err) 756 } 757 defer conn.Close() 758 client := testgrpc.NewTestServiceClient(conn) 759 760 // Note each type of RPC contains a header value that will always be hashed 761 // to a specific backend as the header value matches the value used to 762 // create the entry in the ring. 763 for _, backend := range backends { 764 ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", backend+"_0")) 765 numRPCs := 10 766 reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs) 767 if reqPerBackend[backend] != numRPCs { 768 t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend) 769 } 770 } 771 } 772 773 // Tests that ring hash policy that hashes using a header value and regex 774 // rewrite to aggregate RPCs to 1 backend. 775 func (s) TestRingHash_HeaderHashingWithRegexRewrite(t *testing.T) { 776 backends := backendAddrs(startTestServiceBackends(t, 4)) 777 778 clusterName := "cluster" 779 endpoints := endpointResource(t, clusterName, backends) 780 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 781 ClusterName: clusterName, 782 ServiceName: clusterName, 783 Policy: e2e.LoadBalancingPolicyRingHash, 784 }) 785 route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash") 786 action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route) 787 action.Route.HashPolicy[0].GetHeader().RegexRewrite = &v3matcherpb.RegexMatchAndSubstitute{ 788 Pattern: &v3matcherpb.RegexMatcher{ 789 EngineType: &v3matcherpb.RegexMatcher_GoogleRe2{}, 790 Regex: "[0-9]+", 791 }, 792 Substitution: "foo", 793 } 794 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 795 796 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 797 defer cancel() 798 799 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 800 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 801 t.Fatalf("Failed to update xDS resources: %v", err) 802 } 803 804 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 805 if err != nil { 806 t.Fatalf("Failed to create client: %s", err) 807 } 808 defer conn.Close() 809 client := testgrpc.NewTestServiceClient(conn) 810 811 // Note each type of RPC contains a header value that would always be hashed 812 // to a specific backend as the header value matches the value used to 813 // create the entry in the ring. However, the regex rewrites all numbers to 814 // "foo", and header values only differ by numbers, so they all end up 815 // hashing to the same value. 816 gotPerBackend := make(map[string]int) 817 for _, backend := range backends { 818 ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", backend+"_0")) 819 res := checkRPCSendOK(ctx, t, client, 100) 820 for addr, count := range res { 821 gotPerBackend[addr] += count 822 } 823 } 824 if want := 1; len(gotPerBackend) != want { 825 t.Errorf("Got RPCs routed to %v backends, want %v", len(gotPerBackend), want) 826 } 827 var got int 828 for _, got = range gotPerBackend { 829 } 830 if want := 400; got != want { 831 t.Errorf("Got %v RPCs routed to a backend, want %v", got, want) 832 } 833 } 834 835 // computeIdealNumberOfRPCs computes the ideal number of RPCs to send so that 836 // we can observe an event happening with probability p, and the result will 837 // have value p with the given error tolerance. 838 // 839 // See https://github.com/grpc/grpc/blob/4f6e13bdda9e8c26d6027af97db4b368ca2b3069/test/cpp/end2end/xds/xds_end2end_test_lib.h#L941 840 // for an explanation of the formula. 841 func computeIdealNumberOfRPCs(t *testing.T, p, errorTolerance float64) int { 842 if p < 0 || p > 1 { 843 t.Fatal("p must be in (0, 1)") 844 } 845 numRPCs := math.Ceil(p * (1 - p) * 5. * 5. / errorTolerance / errorTolerance) 846 return int(numRPCs + 1000.) // add 1k as a buffer to avoid flakiness. 847 } 848 849 // setRingHashLBPolicyWithHighMinRingSize sets the ring hash policy with a high 850 // minimum ring size to ensure that the ring is large enough to distribute 851 // requests more uniformly across endpoints when a random hash is used. 852 func setRingHashLBPolicyWithHighMinRingSize(t *testing.T, cluster *v3clusterpb.Cluster) { 853 testutils.SetEnvConfig(t, &envconfig.RingHashCap, minRingSize) 854 855 // Increasing min ring size for random distribution. 856 config := testutils.MarshalAny(t, &v3ringhashpb.RingHash{ 857 HashFunction: v3ringhashpb.RingHash_XX_HASH, 858 MinimumRingSize: &wrapperspb.UInt64Value{Value: minRingSize}, 859 }) 860 cluster.LoadBalancingPolicy = &v3clusterpb.LoadBalancingPolicy{ 861 Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{{ 862 TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ 863 Name: "envoy.load_balancing_policies.ring_hash", 864 TypedConfig: config, 865 }, 866 }}, 867 } 868 } 869 870 // Tests that ring hash policy that hashes using a random value. 871 func (s) TestRingHash_NoHashPolicy(t *testing.T) { 872 backends := backendAddrs(startTestServiceBackends(t, 2)) 873 numRPCs := computeIdealNumberOfRPCs(t, .5, errorTolerance) 874 875 const clusterName = "cluster" 876 endpoints := endpointResource(t, clusterName, backends) 877 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 878 ClusterName: clusterName, 879 ServiceName: clusterName, 880 }) 881 setRingHashLBPolicyWithHighMinRingSize(t, cluster) 882 route := e2e.DefaultRouteConfig("new_route", virtualHostName, clusterName) 883 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 884 885 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 886 defer cancel() 887 888 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 889 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 890 t.Fatalf("Failed to update xDS resources: %v", err) 891 } 892 893 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 894 if err != nil { 895 t.Fatalf("Failed to create client: %s", err) 896 } 897 defer conn.Close() 898 client := testgrpc.NewTestServiceClient(conn) 899 900 // Send a large number of RPCs and check that they are distributed randomly. 901 gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs) 902 for _, backend := range backends { 903 got := float64(gotPerBackend[backend]) / float64(numRPCs) 904 want := .5 905 if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) { 906 t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backend, got, want, errorTolerance) 907 } 908 } 909 } 910 911 // Tests that we observe endpoint weights. 912 func (s) TestRingHash_EndpointWeights(t *testing.T) { 913 backends := backendAddrs(startTestServiceBackends(t, 3)) 914 915 const clusterName = "cluster" 916 backendOpts := []e2e.BackendOptions{ 917 {Ports: []uint32{testutils.ParsePort(t, backends[0])}}, 918 {Ports: []uint32{testutils.ParsePort(t, backends[1])}}, 919 {Ports: []uint32{testutils.ParsePort(t, backends[2])}, Weight: 2}, 920 } 921 922 endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 923 ClusterName: clusterName, 924 Localities: []e2e.LocalityOptions{{ 925 Backends: backendOpts, 926 Weight: 1, 927 }}, 928 }) 929 endpoints.Endpoints[0].LbEndpoints[0].LoadBalancingWeight = wrapperspb.UInt32(uint32(1)) 930 endpoints.Endpoints[0].LbEndpoints[1].LoadBalancingWeight = wrapperspb.UInt32(uint32(1)) 931 endpoints.Endpoints[0].LbEndpoints[2].LoadBalancingWeight = wrapperspb.UInt32(uint32(2)) 932 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 933 ClusterName: clusterName, 934 ServiceName: clusterName, 935 }) 936 // Increasing min ring size for random distribution. 937 setRingHashLBPolicyWithHighMinRingSize(t, cluster) 938 route := e2e.DefaultRouteConfig("new_route", virtualHostName, clusterName) 939 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 940 941 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 942 defer cancel() 943 944 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 945 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 946 t.Fatalf("Failed to update xDS resources: %v", err) 947 } 948 949 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 950 if err != nil { 951 t.Fatalf("Failed to create client: %s", err) 952 } 953 defer conn.Close() 954 client := testgrpc.NewTestServiceClient(conn) 955 956 // Send a large number of RPCs and check that they are distributed randomly. 957 numRPCs := computeIdealNumberOfRPCs(t, .25, errorTolerance) 958 gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs) 959 960 got := float64(gotPerBackend[backends[0]]) / float64(numRPCs) 961 want := .25 962 if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) { 963 t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[0], got, want, errorTolerance) 964 } 965 got = float64(gotPerBackend[backends[1]]) / float64(numRPCs) 966 if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) { 967 t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[1], got, want, errorTolerance) 968 } 969 got = float64(gotPerBackend[backends[2]]) / float64(numRPCs) 970 want = .50 971 if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) { 972 t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[2], got, want, errorTolerance) 973 } 974 } 975 976 // Tests that ring hash policy evaluation will continue past the terminal hash 977 // policy if no results are produced yet. 978 func (s) TestRingHash_ContinuesPastTerminalPolicyThatDoesNotProduceResult(t *testing.T) { 979 backends := backendAddrs(startTestServiceBackends(t, 2)) 980 981 const clusterName = "cluster" 982 endpoints := endpointResource(t, clusterName, backends) 983 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 984 ClusterName: clusterName, 985 ServiceName: clusterName, 986 Policy: e2e.LoadBalancingPolicyRingHash, 987 }) 988 989 route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName) 990 991 // Even though this hash policy is terminal, since it produces no result, we 992 // continue past it to find a policy that produces results. 993 hashPolicy := v3routepb.RouteAction_HashPolicy{ 994 PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{ 995 Header: &v3routepb.RouteAction_HashPolicy_Header{ 996 HeaderName: "header_not_present", 997 }, 998 }, 999 Terminal: true, 1000 } 1001 hashPolicy2 := v3routepb.RouteAction_HashPolicy{ 1002 PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{ 1003 Header: &v3routepb.RouteAction_HashPolicy_Header{ 1004 HeaderName: "address_hash", 1005 }, 1006 }, 1007 } 1008 action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route) 1009 action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&hashPolicy, &hashPolicy2} 1010 1011 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 1012 1013 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1014 defer cancel() 1015 1016 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 1017 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1018 t.Fatalf("Failed to update xDS resources: %v", err) 1019 } 1020 1021 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 1022 if err != nil { 1023 t.Fatalf("Failed to create client: %s", err) 1024 } 1025 defer conn.Close() 1026 client := testgrpc.NewTestServiceClient(conn) 1027 1028 // - The first hash policy does not match because the header is not present. 1029 // If this hash policy was applied, it would spread the load across 1030 // backend 0 and 1, since a random hash would be used. 1031 // - In the second hash policy, each type of RPC contains a header 1032 // value that always hashes to backend 0, as the header value 1033 // matches the value used to create the entry in the ring. 1034 // We verify that the second hash policy is used by checking that all RPCs 1035 // are being routed to backend 0. 1036 wantBackend := backends[0] 1037 ctx = metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", wantBackend+"_0")) 1038 const numRPCs = 100 1039 gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs) 1040 if got := gotPerBackend[wantBackend]; got != numRPCs { 1041 t.Errorf("Got %v RPCs routed to backend %v, want %v", got, wantBackend, numRPCs) 1042 } 1043 } 1044 1045 // Tests that a random hash is used when header hashing policy specified a 1046 // header field that the RPC did not have. 1047 func (s) TestRingHash_HashOnHeaderThatIsNotPresent(t *testing.T) { 1048 backends := backendAddrs(startTestServiceBackends(t, 2)) 1049 wantFractionPerBackend := .5 1050 numRPCs := computeIdealNumberOfRPCs(t, wantFractionPerBackend, errorTolerance) 1051 1052 const clusterName = "cluster" 1053 endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 1054 ClusterName: clusterName, 1055 Localities: []e2e.LocalityOptions{{ 1056 Backends: backendOptions(t, backends), 1057 Weight: 1, 1058 }}, 1059 }) 1060 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 1061 ClusterName: clusterName, 1062 ServiceName: clusterName, 1063 }) 1064 setRingHashLBPolicyWithHighMinRingSize(t, cluster) 1065 route := headerHashRoute("new_route", virtualHostName, clusterName, "header_not_present") 1066 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 1067 1068 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1069 defer cancel() 1070 1071 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 1072 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1073 t.Fatalf("Failed to update xDS resources: %v", err) 1074 } 1075 1076 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 1077 if err != nil { 1078 t.Fatalf("Failed to create client: %s", err) 1079 } 1080 defer conn.Close() 1081 client := testgrpc.NewTestServiceClient(conn) 1082 1083 // The first hash policy does not apply because the header is not present in 1084 // the RPCs that we are about to send. As a result, a random hash should be 1085 // used instead, resulting in a random request distribution. 1086 // We verify this by checking that the RPCs are distributed randomly. 1087 gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs) 1088 for _, backend := range backends { 1089 got := float64(gotPerBackend[backend]) / float64(numRPCs) 1090 if !cmp.Equal(got, wantFractionPerBackend, cmpopts.EquateApprox(0, errorTolerance)) { 1091 t.Errorf("fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backend, got, wantFractionPerBackend, errorTolerance) 1092 } 1093 } 1094 } 1095 1096 // Tests that a random hash is used when only unsupported hash policies are 1097 // configured. 1098 func (s) TestRingHash_UnsupportedHashPolicyDefaultToRandomHashing(t *testing.T) { 1099 backends := backendAddrs(startTestServiceBackends(t, 2)) 1100 wantFractionPerBackend := .5 1101 numRPCs := computeIdealNumberOfRPCs(t, wantFractionPerBackend, errorTolerance) 1102 1103 const clusterName = "cluster" 1104 endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 1105 ClusterName: clusterName, 1106 Localities: []e2e.LocalityOptions{{ 1107 Backends: backendOptions(t, backends), 1108 Weight: 1, 1109 }}, 1110 }) 1111 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 1112 ClusterName: clusterName, 1113 ServiceName: clusterName, 1114 }) 1115 setRingHashLBPolicyWithHighMinRingSize(t, cluster) 1116 route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName) 1117 unsupportedHashPolicy1 := v3routepb.RouteAction_HashPolicy{ 1118 PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Cookie_{ 1119 Cookie: &v3routepb.RouteAction_HashPolicy_Cookie{Name: "cookie"}, 1120 }, 1121 } 1122 unsupportedHashPolicy2 := v3routepb.RouteAction_HashPolicy{ 1123 PolicySpecifier: &v3routepb.RouteAction_HashPolicy_ConnectionProperties_{ 1124 ConnectionProperties: &v3routepb.RouteAction_HashPolicy_ConnectionProperties{SourceIp: true}, 1125 }, 1126 } 1127 unsupportedHashPolicy3 := v3routepb.RouteAction_HashPolicy{ 1128 PolicySpecifier: &v3routepb.RouteAction_HashPolicy_QueryParameter_{ 1129 QueryParameter: &v3routepb.RouteAction_HashPolicy_QueryParameter{Name: "query_parameter"}, 1130 }, 1131 } 1132 action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route) 1133 action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&unsupportedHashPolicy1, &unsupportedHashPolicy2, &unsupportedHashPolicy3} 1134 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 1135 1136 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1137 defer cancel() 1138 1139 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 1140 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1141 t.Fatalf("Failed to update xDS resources: %v", err) 1142 } 1143 1144 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 1145 if err != nil { 1146 t.Fatalf("Failed to create client: %s", err) 1147 } 1148 defer conn.Close() 1149 client := testgrpc.NewTestServiceClient(conn) 1150 1151 // Since none of the hash policy are supported, a random hash should be 1152 // generated for every request. 1153 // We verify this by checking that the RPCs are distributed randomly. 1154 gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs) 1155 for _, backend := range backends { 1156 got := float64(gotPerBackend[backend]) / float64(numRPCs) 1157 if !cmp.Equal(got, wantFractionPerBackend, cmpopts.EquateApprox(0, errorTolerance)) { 1158 t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backend, got, wantFractionPerBackend, errorTolerance) 1159 } 1160 } 1161 } 1162 1163 // Tests that unsupported hash policy types are all ignored before a supported 1164 // hash policy. 1165 func (s) TestRingHash_UnsupportedHashPolicyUntilChannelIdHashing(t *testing.T) { 1166 backends := backendAddrs(startTestServiceBackends(t, 2)) 1167 1168 const clusterName = "cluster" 1169 endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 1170 ClusterName: clusterName, 1171 Localities: []e2e.LocalityOptions{{ 1172 Backends: backendOptions(t, backends), 1173 Weight: 1, 1174 }}, 1175 }) 1176 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 1177 ClusterName: clusterName, 1178 ServiceName: clusterName, 1179 }) 1180 setRingHashLBPolicyWithHighMinRingSize(t, cluster) 1181 route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName) 1182 unsupportedHashPolicy1 := v3routepb.RouteAction_HashPolicy{ 1183 PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Cookie_{ 1184 Cookie: &v3routepb.RouteAction_HashPolicy_Cookie{Name: "cookie"}, 1185 }, 1186 } 1187 unsupportedHashPolicy2 := v3routepb.RouteAction_HashPolicy{ 1188 PolicySpecifier: &v3routepb.RouteAction_HashPolicy_ConnectionProperties_{ 1189 ConnectionProperties: &v3routepb.RouteAction_HashPolicy_ConnectionProperties{SourceIp: true}, 1190 }, 1191 } 1192 unsupportedHashPolicy3 := v3routepb.RouteAction_HashPolicy{ 1193 PolicySpecifier: &v3routepb.RouteAction_HashPolicy_QueryParameter_{ 1194 QueryParameter: &v3routepb.RouteAction_HashPolicy_QueryParameter{Name: "query_parameter"}, 1195 }, 1196 } 1197 channelIDhashPolicy := v3routepb.RouteAction_HashPolicy{ 1198 PolicySpecifier: &v3routepb.RouteAction_HashPolicy_FilterState_{ 1199 FilterState: &v3routepb.RouteAction_HashPolicy_FilterState{ 1200 Key: "io.grpc.channel_id", 1201 }, 1202 }, 1203 } 1204 action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route) 1205 action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&unsupportedHashPolicy1, &unsupportedHashPolicy2, &unsupportedHashPolicy3, &channelIDhashPolicy} 1206 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 1207 1208 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1209 defer cancel() 1210 1211 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 1212 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1213 t.Fatalf("Failed to update xDS resources: %v", err) 1214 } 1215 1216 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 1217 if err != nil { 1218 t.Fatalf("Failed to create client: %s", err) 1219 } 1220 defer conn.Close() 1221 client := testgrpc.NewTestServiceClient(conn) 1222 1223 // Since only unsupported policies are present except for the last one 1224 // which is using the channel ID hashing policy, all requests should be 1225 // routed to the same backend. 1226 const numRPCs = 100 1227 gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs) 1228 if len(gotPerBackend) != 1 { 1229 t.Errorf("Got RPCs routed to %v backends, want 1", len(gotPerBackend)) 1230 } 1231 var got int 1232 for _, got = range gotPerBackend { 1233 } 1234 if got != numRPCs { 1235 t.Errorf("Got %v RPCs routed to a backend, want %v", got, numRPCs) 1236 } 1237 } 1238 1239 // Tests that ring hash policy that hashes using a random value can spread RPCs 1240 // across all the backends according to locality weight. 1241 func (s) TestRingHash_RandomHashingDistributionAccordingToLocalityAndEndpointWeight(t *testing.T) { 1242 backends := backendAddrs(startTestServiceBackends(t, 2)) 1243 1244 const clusterName = "cluster" 1245 const locality1Weight = uint32(1) 1246 const endpoint1Weight = uint32(1) 1247 const locality2Weight = uint32(2) 1248 const endpoint2Weight = uint32(2) 1249 endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 1250 ClusterName: clusterName, 1251 Localities: []e2e.LocalityOptions{ 1252 { 1253 Backends: []e2e.BackendOptions{{ 1254 Ports: []uint32{testutils.ParsePort(t, backends[0])}, 1255 Weight: endpoint1Weight, 1256 }}, 1257 Weight: locality1Weight, 1258 }, 1259 { 1260 Backends: []e2e.BackendOptions{{ 1261 Ports: []uint32{testutils.ParsePort(t, backends[1])}, 1262 Weight: endpoint2Weight, 1263 }}, 1264 Weight: locality2Weight, 1265 }, 1266 }, 1267 }) 1268 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 1269 ClusterName: clusterName, 1270 ServiceName: clusterName, 1271 }) 1272 setRingHashLBPolicyWithHighMinRingSize(t, cluster) 1273 route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName) 1274 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 1275 1276 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1277 defer cancel() 1278 1279 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 1280 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1281 t.Fatalf("Failed to update xDS resources: %v", err) 1282 } 1283 1284 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 1285 if err != nil { 1286 t.Fatalf("Failed to create client: %s", err) 1287 } 1288 defer conn.Close() 1289 client := testgrpc.NewTestServiceClient(conn) 1290 1291 const weight1 = endpoint1Weight * locality1Weight 1292 const weight2 = endpoint2Weight * locality2Weight 1293 const wantRPCs1 = float64(weight1) / float64(weight1+weight2) 1294 const wantRPCs2 = float64(weight2) / float64(weight1+weight2) 1295 numRPCs := computeIdealNumberOfRPCs(t, math.Min(wantRPCs1, wantRPCs2), errorTolerance) 1296 1297 // Send a large number of RPCs and check that they are distributed randomly. 1298 gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs) 1299 got := float64(gotPerBackend[backends[0]]) / float64(numRPCs) 1300 if !cmp.Equal(got, wantRPCs1, cmpopts.EquateApprox(0, errorTolerance)) { 1301 t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[2], got, wantRPCs1, errorTolerance) 1302 } 1303 got = float64(gotPerBackend[backends[1]]) / float64(numRPCs) 1304 if !cmp.Equal(got, wantRPCs2, cmpopts.EquateApprox(0, errorTolerance)) { 1305 t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backends[2], got, wantRPCs2, errorTolerance) 1306 } 1307 } 1308 1309 // Tests that ring hash policy that hashes using a fixed string ensures all RPCs 1310 // to go 1 particular backend; and that subsequent hashing policies are ignored 1311 // due to the setting of terminal. 1312 func (s) TestRingHash_FixedHashingTerminalPolicy(t *testing.T) { 1313 backends := backendAddrs(startTestServiceBackends(t, 2)) 1314 const clusterName = "cluster" 1315 endpoints := endpointResource(t, clusterName, backends) 1316 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 1317 ClusterName: clusterName, 1318 ServiceName: clusterName, 1319 Policy: e2e.LoadBalancingPolicyRingHash, 1320 }) 1321 1322 route := e2e.DefaultRouteConfig("new_route", "test.server", clusterName) 1323 1324 hashPolicy := v3routepb.RouteAction_HashPolicy{ 1325 PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{ 1326 Header: &v3routepb.RouteAction_HashPolicy_Header{ 1327 HeaderName: "fixed_string", 1328 }, 1329 }, 1330 Terminal: true, 1331 } 1332 hashPolicy2 := v3routepb.RouteAction_HashPolicy{ 1333 PolicySpecifier: &v3routepb.RouteAction_HashPolicy_Header_{ 1334 Header: &v3routepb.RouteAction_HashPolicy_Header{ 1335 HeaderName: "random_string", 1336 }, 1337 }, 1338 } 1339 action := route.VirtualHosts[0].Routes[0].Action.(*v3routepb.Route_Route) 1340 action.Route.HashPolicy = []*v3routepb.RouteAction_HashPolicy{&hashPolicy, &hashPolicy2} 1341 1342 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 1343 1344 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1345 defer cancel() 1346 1347 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 1348 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1349 t.Fatalf("Failed to update xDS resources: %v", err) 1350 } 1351 1352 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 1353 if err != nil { 1354 t.Fatalf("Failed to create client: %s", err) 1355 } 1356 defer conn.Close() 1357 client := testgrpc.NewTestServiceClient(conn) 1358 1359 // Check that despite the matching random string header, since the fixed 1360 // string hash policy is terminal, only the fixed string hash policy applies 1361 // and requests all get routed to the same host. 1362 gotPerBackend := make(map[string]int) 1363 const numRPCs = 100 1364 for i := 0; i < numRPCs; i++ { 1365 ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs( 1366 "fixed_string", backends[0]+"_0", 1367 "random_string", fmt.Sprintf("%d", rand.Int())), 1368 ) 1369 var remote peer.Peer 1370 _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&remote)) 1371 if err != nil { 1372 t.Fatalf("rpc EmptyCall() failed: %v", err) 1373 } 1374 gotPerBackend[remote.Addr.String()]++ 1375 } 1376 1377 if len(gotPerBackend) != 1 { 1378 t.Error("Got RPCs routed to multiple backends, want a single backend") 1379 } 1380 if got := gotPerBackend[backends[0]]; got != numRPCs { 1381 t.Errorf("Got %v RPCs routed to %v, want %v", got, backends[0], numRPCs) 1382 } 1383 } 1384 1385 // TestRingHash_IdleToReady tests that the channel will go from idle to ready 1386 // via connecting; (though it is not possible to catch the connecting state 1387 // before moving to ready via the public API). 1388 // TODO: we should be able to catch all state transitions by using the internal.SubscribeToConnectivityStateChanges API. 1389 func (s) TestRingHash_IdleToReady(t *testing.T) { 1390 backends := backendAddrs(startTestServiceBackends(t, 1)) 1391 1392 const clusterName = "cluster" 1393 endpoints := endpointResource(t, clusterName, backends) 1394 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 1395 ClusterName: clusterName, 1396 ServiceName: clusterName, 1397 Policy: e2e.LoadBalancingPolicyRingHash, 1398 }) 1399 route := channelIDHashRoute("new_route", virtualHostName, clusterName) 1400 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 1401 1402 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1403 defer cancel() 1404 1405 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 1406 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1407 t.Fatalf("Failed to update xDS resources: %v", err) 1408 } 1409 1410 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 1411 if err != nil { 1412 t.Fatalf("Failed to create client: %s", err) 1413 } 1414 defer conn.Close() 1415 testutils.AwaitState(ctx, t, conn, connectivity.Idle) 1416 1417 client := testgrpc.NewTestServiceClient(conn) 1418 checkRPCSendOK(ctx, t, client, 1) 1419 testutils.AwaitState(ctx, t, conn, connectivity.Ready) 1420 } 1421 1422 // Test that the channel will transition to READY once it starts 1423 // connecting even if there are no RPCs being sent to the picker. 1424 func (s) TestRingHash_ContinuesConnectingWithoutPicks(t *testing.T) { 1425 backend := stubserver.StartTestService(t, &stubserver.StubServer{ 1426 // We expect the server EmptyCall to not be call here because the 1427 // aggregated channel state is never READY when the call is pending. 1428 EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) { 1429 t.Errorf("EmptyCall() should not have been called") 1430 return &testpb.Empty{}, nil 1431 }, 1432 }) 1433 defer backend.Stop() 1434 1435 unReachableServerAddr := makeUnreachableBackends(t, 1)[0] 1436 1437 const clusterName = "cluster" 1438 endpoints := endpointResource(t, clusterName, []string{backend.Address, unReachableServerAddr}) 1439 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 1440 ClusterName: clusterName, 1441 ServiceName: clusterName, 1442 Policy: e2e.LoadBalancingPolicyRingHash, 1443 }) 1444 route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash") 1445 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 1446 1447 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1448 defer cancel() 1449 1450 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 1451 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1452 t.Fatalf("Failed to update xDS resources: %v", err) 1453 } 1454 1455 dialer := testutils.NewBlockingDialer() 1456 dopts := []grpc.DialOption{ 1457 grpc.WithResolvers(xdsResolver), 1458 grpc.WithTransportCredentials(insecure.NewCredentials()), 1459 grpc.WithContextDialer(dialer.DialContext), 1460 } 1461 conn, err := grpc.NewClient("xds:///test.server", dopts...) 1462 if err != nil { 1463 t.Fatalf("Failed to create client: %s", err) 1464 } 1465 defer conn.Close() 1466 client := testgrpc.NewTestServiceClient(conn) 1467 1468 hold := dialer.Hold(backend.Address) 1469 1470 rpcCtx, rpcCancel := context.WithCancel(ctx) 1471 go func() { 1472 rpcCtx = metadata.NewOutgoingContext(rpcCtx, metadata.Pairs("address_hash", unReachableServerAddr+"_0")) 1473 _, err := client.EmptyCall(rpcCtx, &testpb.Empty{}) 1474 if status.Code(err) != codes.Canceled { 1475 t.Errorf("Expected RPC to be canceled, got error: %v", err) 1476 } 1477 }() 1478 1479 // Wait for the connection attempt to the real backend. 1480 if !hold.Wait(ctx) { 1481 t.Fatalf("Timeout waiting for connection attempt to backend %v.", backend.Address) 1482 } 1483 // Now cancel the RPC while we are still connecting. 1484 rpcCancel() 1485 1486 // This allows the connection attempts to continue. The RPC was cancelled 1487 // before the backend was connected, but the backend is up. The conn 1488 // becomes Ready due to the connection attempt to the existing backend 1489 // succeeding, despite no new RPC being sent. 1490 hold.Resume() 1491 1492 testutils.AwaitState(ctx, t, conn, connectivity.Ready) 1493 } 1494 1495 // Tests that when the first pick is down leading to a transient failure, we 1496 // will move on to the next ring hash entry. 1497 func (s) TestRingHash_TransientFailureCheckNextOne(t *testing.T) { 1498 backends := backendAddrs(startTestServiceBackends(t, 1)) 1499 unReachableBackends := makeUnreachableBackends(t, 1) 1500 1501 const clusterName = "cluster" 1502 endpoints := endpointResource(t, clusterName, append(unReachableBackends, backends...)) 1503 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 1504 ClusterName: clusterName, 1505 ServiceName: clusterName, 1506 Policy: e2e.LoadBalancingPolicyRingHash, 1507 }) 1508 route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash") 1509 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 1510 1511 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1512 defer cancel() 1513 1514 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 1515 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1516 t.Fatalf("Failed to update xDS resources: %v", err) 1517 } 1518 1519 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 1520 if err != nil { 1521 t.Fatalf("Failed to create client: %s", err) 1522 } 1523 defer conn.Close() 1524 client := testgrpc.NewTestServiceClient(conn) 1525 1526 // Note each type of RPC contains a header value that will always be hashed 1527 // the value that was used to place the non-existent endpoint on the ring, 1528 // but it still gets routed to the backend that is up. 1529 ctx = metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", unReachableBackends[0]+"_0")) 1530 reqPerBackend := checkRPCSendOK(ctx, t, client, 1) 1531 var got string 1532 for got = range reqPerBackend { 1533 } 1534 if want := backends[0]; got != want { 1535 t.Errorf("Got RPC routed to addr %v, want %v", got, want) 1536 } 1537 } 1538 1539 // Tests for a bug seen in the wild in c-core, where ring_hash started with no 1540 // endpoints and reported TRANSIENT_FAILURE, then got an update with endpoints 1541 // and reported IDLE, but the picker update was squelched, so it failed to ever 1542 // get reconnected. 1543 func (s) TestRingHash_ReattemptWhenGoingFromTransientFailureToIdle(t *testing.T) { 1544 const clusterName = "cluster" 1545 endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 1546 ClusterName: clusterName, 1547 Localities: []e2e.LocalityOptions{{}}, // note the empty locality (no endpoint). 1548 }) 1549 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 1550 ClusterName: clusterName, 1551 ServiceName: clusterName, 1552 Policy: e2e.LoadBalancingPolicyRingHash, 1553 }) 1554 route := e2e.DefaultRouteConfig("new_route", virtualHostName, clusterName) 1555 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 1556 1557 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1558 defer cancel() 1559 1560 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 1561 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1562 t.Fatalf("Failed to update xDS resources: %v", err) 1563 } 1564 1565 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 1566 if err != nil { 1567 t.Fatalf("Failed to create client: %s", err) 1568 } 1569 defer conn.Close() 1570 testutils.AwaitState(ctx, t, conn, connectivity.Idle) 1571 1572 // There are no endpoints in EDS. RPCs should fail and the channel should 1573 // transition to transient failure. 1574 client := testgrpc.NewTestServiceClient(conn) 1575 if _, err = client.EmptyCall(ctx, &testpb.Empty{}); err == nil { 1576 t.Errorf("rpc EmptyCall() succeeded, want error") 1577 } 1578 testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure) 1579 1580 t.Log("Updating EDS with a new backend endpoint.") 1581 backends := backendAddrs(startTestServiceBackends(t, 1)) 1582 endpoints = e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 1583 ClusterName: clusterName, 1584 Localities: []e2e.LocalityOptions{{ 1585 Backends: backendOptions(t, backends), 1586 Weight: 1, 1587 }}, 1588 }) 1589 if err = xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1590 t.Fatalf("Failed to update xDS resources: %v", err) 1591 } 1592 1593 // A WaitForReady RPC should succeed, and the channel should report READY. 1594 if _, err = client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil { 1595 t.Errorf("rpc EmptyCall() failed: %v", err) 1596 } 1597 testutils.AwaitState(ctx, t, conn, connectivity.Ready) 1598 } 1599 1600 // Tests that when all backends are down and then up, we may pick a TF backend 1601 // and we will then jump to ready backend. 1602 func (s) TestRingHash_TransientFailureSkipToAvailableReady(t *testing.T) { 1603 emptyCallF := func(context.Context, *testpb.Empty) (*testpb.Empty, error) { 1604 return &testpb.Empty{}, nil 1605 } 1606 lis, err := testutils.LocalTCPListener() 1607 if err != nil { 1608 t.Fatalf("Failed to create listener: %v", err) 1609 } 1610 restartableListener1 := testutils.NewRestartableListener(lis) 1611 restartableServer1 := stubserver.StartTestService(t, &stubserver.StubServer{ 1612 Listener: restartableListener1, 1613 EmptyCallF: emptyCallF, 1614 }) 1615 defer restartableServer1.Stop() 1616 1617 lis, err = testutils.LocalTCPListener() 1618 if err != nil { 1619 t.Fatalf("Failed to create listener: %v", err) 1620 } 1621 restartableListener2 := testutils.NewRestartableListener(lis) 1622 restartableServer2 := stubserver.StartTestService(t, &stubserver.StubServer{ 1623 Listener: restartableListener2, 1624 EmptyCallF: emptyCallF, 1625 }) 1626 defer restartableServer2.Stop() 1627 1628 unReachableBackends := makeUnreachableBackends(t, 2) 1629 1630 const clusterName = "cluster" 1631 backends := []string{restartableServer1.Address, restartableServer2.Address} 1632 backends = append(backends, unReachableBackends...) 1633 endpoints := endpointResource(t, clusterName, backends) 1634 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 1635 ClusterName: clusterName, 1636 ServiceName: clusterName, 1637 Policy: e2e.LoadBalancingPolicyRingHash, 1638 }) 1639 route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash") 1640 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 1641 1642 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1643 defer cancel() 1644 1645 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 1646 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1647 t.Fatalf("Failed to update xDS resources: %v", err) 1648 } 1649 opts := []grpc.DialOption{ 1650 grpc.WithConnectParams(grpc.ConnectParams{ 1651 // Disable backoff to speed up the test. 1652 MinConnectTimeout: 100 * time.Millisecond, 1653 }), 1654 grpc.WithResolvers(xdsResolver), 1655 grpc.WithTransportCredentials(insecure.NewCredentials()), 1656 } 1657 conn, err := grpc.NewClient("xds:///test.server", opts...) 1658 if err != nil { 1659 t.Fatalf("Failed to create client: %s", err) 1660 } 1661 defer conn.Close() 1662 client := testgrpc.NewTestServiceClient(conn) 1663 1664 testutils.AwaitState(ctx, t, conn, connectivity.Idle) 1665 1666 // Test starts with backends not listening. 1667 restartableListener1.Stop() 1668 restartableListener2.Stop() 1669 1670 // Send a request with a hash that should go to restartableServer1. 1671 // Because it is not accepting connections, and no other backend is 1672 // listening, the RPC fails. 1673 ctx = metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", restartableServer1.Address+"_0")) 1674 if _, err = client.EmptyCall(ctx, &testpb.Empty{}); err == nil { 1675 t.Fatalf("rpc EmptyCall() succeeded, want error") 1676 } 1677 1678 testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure) 1679 1680 // Bring up first backend. The channel should become Ready without any 1681 // picks, because in TF, we are always trying to connect to at least one 1682 // backend at all times. 1683 restartableListener1.Restart() 1684 testutils.AwaitState(ctx, t, conn, connectivity.Ready) 1685 1686 // Bring down backend 1 and bring up backend 2. 1687 // Note the RPC contains a header value that will always be hashed to 1688 // backend 1. So by purposely bringing down backend 1 and bringing up 1689 // another backend, this will ensure Picker's first choice of backend 1 1690 // fails and it will go through the remaining subchannels to find one in 1691 // READY. Since the entries in the ring are pretty distributed and we have 1692 // unused ports to fill the ring, it is almost guaranteed that the Picker 1693 // will go through some non-READY entries and skip them as per design. 1694 t.Logf("bringing down backend 1") 1695 restartableListener1.Stop() 1696 1697 testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure) 1698 if _, err = client.EmptyCall(ctx, &testpb.Empty{}); err == nil { 1699 t.Fatalf("rpc EmptyCall() succeeded, want error") 1700 } 1701 1702 t.Logf("bringing up backend 2") 1703 restartableListener2.Restart() 1704 testutils.AwaitState(ctx, t, conn, connectivity.Ready) 1705 1706 wantPeerAddr := "" 1707 for wantPeerAddr != restartableServer2.Address { 1708 p := peer.Peer{} 1709 if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&p)); errors.Is(err, context.DeadlineExceeded) { 1710 t.Fatalf("Timed out waiting for rpc EmptyCall() to be routed to the expected backend") 1711 } 1712 wantPeerAddr = p.Addr.String() 1713 } 1714 } 1715 1716 // Tests that when all backends are down, we keep reattempting. 1717 func (s) TestRingHash_ReattemptWhenAllEndpointsUnreachable(t *testing.T) { 1718 lis, err := testutils.LocalTCPListener() 1719 if err != nil { 1720 t.Fatalf("Failed to create listener: %v", err) 1721 } 1722 restartableListener := testutils.NewRestartableListener(lis) 1723 restartableServer := stubserver.StartTestService(t, &stubserver.StubServer{ 1724 Listener: restartableListener, 1725 EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) { 1726 return &testpb.Empty{}, nil 1727 }, 1728 }) 1729 defer restartableServer.Stop() 1730 1731 const clusterName = "cluster" 1732 endpoints := endpointResource(t, clusterName, []string{restartableServer.Address}) 1733 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 1734 ClusterName: clusterName, 1735 ServiceName: clusterName, 1736 Policy: e2e.LoadBalancingPolicyRingHash, 1737 }) 1738 route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash") 1739 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 1740 1741 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1742 defer cancel() 1743 1744 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 1745 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1746 t.Fatalf("Failed to update xDS resources: %v", err) 1747 } 1748 1749 dopts := []grpc.DialOption{ 1750 grpc.WithResolvers(xdsResolver), 1751 grpc.WithTransportCredentials(insecure.NewCredentials()), 1752 grpc.WithConnectParams(fastConnectParams), 1753 } 1754 conn, err := grpc.NewClient("xds:///test.server", dopts...) 1755 if err != nil { 1756 t.Fatalf("Failed to create client: %s", err) 1757 } 1758 defer conn.Close() 1759 client := testgrpc.NewTestServiceClient(conn) 1760 1761 testutils.AwaitState(ctx, t, conn, connectivity.Idle) 1762 1763 t.Log("Stopping the backend server") 1764 restartableListener.Stop() 1765 1766 if _, err = client.EmptyCall(ctx, &testpb.Empty{}); status.Code(err) != codes.Unavailable { 1767 t.Fatalf("rpc EmptyCall() succeeded, want Unavailable error") 1768 } 1769 1770 // Wait for channel to fail. 1771 testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure) 1772 1773 t.Log("Restarting the backend server") 1774 restartableListener.Restart() 1775 1776 // Wait for channel to become READY without any pending RPC. 1777 testutils.AwaitState(ctx, t, conn, connectivity.Ready) 1778 } 1779 1780 // Tests that when a backend goes down, we will move on to the next subchannel 1781 // (with a lower priority). When the backend comes back up, traffic will move 1782 // back. 1783 func (s) TestRingHash_SwitchToLowerPriorityAndThenBack(t *testing.T) { 1784 lis, err := testutils.LocalTCPListener() 1785 if err != nil { 1786 t.Fatalf("Failed to create listener: %v", err) 1787 } 1788 restartableListener := testutils.NewRestartableListener(lis) 1789 restartableServer := stubserver.StartTestService(t, &stubserver.StubServer{ 1790 Listener: restartableListener, 1791 EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) { 1792 return &testpb.Empty{}, nil 1793 }, 1794 }) 1795 defer restartableServer.Stop() 1796 1797 otherBackend := backendAddrs(startTestServiceBackends(t, 1))[0] 1798 1799 // We must set the host name socket address in EDS, as the ring hash policy 1800 // uses it to construct the ring. 1801 host, _, err := net.SplitHostPort(otherBackend) 1802 if err != nil { 1803 t.Fatalf("Failed to split host and port from stubserver: %v", err) 1804 } 1805 1806 const clusterName = "cluster" 1807 endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 1808 ClusterName: clusterName, 1809 Host: host, 1810 Localities: []e2e.LocalityOptions{{ 1811 Backends: backendOptions(t, []string{restartableServer.Address}), 1812 Weight: 1, 1813 }, { 1814 Backends: backendOptions(t, []string{otherBackend}), 1815 Weight: 1, 1816 Priority: 1, 1817 }}}) 1818 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 1819 ClusterName: clusterName, 1820 ServiceName: clusterName, 1821 Policy: e2e.LoadBalancingPolicyRingHash, 1822 }) 1823 route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash") 1824 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 1825 1826 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1827 defer cancel() 1828 1829 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 1830 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1831 t.Fatalf("Failed to update xDS resources: %v", err) 1832 } 1833 1834 dopts := []grpc.DialOption{ 1835 grpc.WithResolvers(xdsResolver), 1836 grpc.WithTransportCredentials(insecure.NewCredentials()), 1837 grpc.WithConnectParams(fastConnectParams), 1838 } 1839 conn, err := grpc.NewClient("xds:///test.server", dopts...) 1840 if err != nil { 1841 t.Fatalf("Failed to create client: %s", err) 1842 } 1843 defer conn.Close() 1844 client := testgrpc.NewTestServiceClient(conn) 1845 1846 // Note each type of RPC contains a header value that will always be hashed 1847 // to the value that was used to place the non-existent endpoint on the ring. 1848 ctx = metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", restartableServer.Address+"_0")) 1849 var got string 1850 for got = range checkRPCSendOK(ctx, t, client, 1) { 1851 } 1852 if want := restartableServer.Address; got != want { 1853 t.Fatalf("Got RPC routed to addr %v, want %v", got, want) 1854 } 1855 1856 // Trigger failure with the existing backend, which should cause the 1857 // balancer to go in transient failure and the priority balancer to move 1858 // to the lower priority. 1859 restartableListener.Stop() 1860 1861 for { 1862 p := peer.Peer{} 1863 _, err = client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true), grpc.Peer(&p)) 1864 1865 // Ignore errors: we may need to attempt to send an RPC to detect the 1866 // failure (the next write on connection fails). 1867 if err == nil { 1868 if got, want := p.Addr.String(), otherBackend; got != want { 1869 t.Fatalf("Got RPC routed to addr %v, want %v", got, want) 1870 } 1871 break 1872 } 1873 } 1874 1875 // Now we start the backend with the address hash that is used in the 1876 // metadata, so eventually RPCs should be routed to it, since it is in a 1877 // locality with higher priority. 1878 peerAddr := "" 1879 restartableListener.Restart() 1880 for peerAddr != restartableServer.Address { 1881 p := peer.Peer{} 1882 _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&p)) 1883 if errors.Is(err, context.DeadlineExceeded) { 1884 t.Fatalf("Timed out waiting for rpc EmptyCall() to be routed to the expected backend") 1885 } 1886 peerAddr = p.Addr.String() 1887 } 1888 } 1889 1890 // Tests that when we trigger internal connection attempts without picks, we 1891 // keep retrying all the SubConns that have reported TF previously. 1892 func (s) TestRingHash_ContinuesConnectingWithoutPicksToMultipleSubConnsConcurrently(t *testing.T) { 1893 const backendsCount = 4 1894 backends := backendAddrs(startTestServiceBackends(t, backendsCount)) 1895 1896 const clusterName = "cluster" 1897 1898 endpoints := endpointResource(t, clusterName, backends) 1899 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 1900 ClusterName: clusterName, 1901 ServiceName: clusterName, 1902 Policy: e2e.LoadBalancingPolicyRingHash, 1903 }) 1904 route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash") 1905 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 1906 1907 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1908 defer cancel() 1909 1910 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 1911 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 1912 t.Fatalf("Failed to update xDS resources: %v", err) 1913 } 1914 1915 dialer := testutils.NewBlockingDialer() 1916 dialOpts := []grpc.DialOption{ 1917 grpc.WithResolvers(xdsResolver), 1918 grpc.WithTransportCredentials(insecure.NewCredentials()), 1919 grpc.WithContextDialer(dialer.DialContext), 1920 grpc.WithConnectParams(fastConnectParams), 1921 } 1922 conn, err := grpc.NewClient("xds:///test.server", dialOpts...) 1923 if err != nil { 1924 t.Fatalf("Failed to create client: %s", err) 1925 } 1926 defer conn.Close() 1927 1928 // Create holds for each backend address to delay a successful connection 1929 // until the end of the test. 1930 holds := make([]*testutils.Hold, backendsCount) 1931 for i := 0; i < len(backends); i++ { 1932 holds[i] = dialer.Hold(backends[i]) 1933 } 1934 1935 client := testgrpc.NewTestServiceClient(conn) 1936 1937 rpcCtx, rpcCancel := context.WithCancel(ctx) 1938 errCh := make(chan error, 1) 1939 go func() { 1940 rpcCtx = metadata.NewOutgoingContext(rpcCtx, metadata.Pairs("address_hash", backends[0]+"_0")) 1941 _, err := client.EmptyCall(rpcCtx, &testpb.Empty{}) 1942 if status.Code(err) == codes.Canceled { 1943 errCh <- nil 1944 return 1945 } 1946 errCh <- err 1947 }() 1948 1949 // Wait for the RPC to trigger a connection attempt to the first address, 1950 // then cancel the RPC. No other connection attempts should be started yet. 1951 if !holds[0].Wait(ctx) { 1952 t.Fatalf("Timeout waiting for connection attempt to backend 0") 1953 } 1954 rpcCancel() 1955 if err := <-errCh; err != nil { 1956 t.Fatalf("Expected RPC to fail be canceled, got %v", err) 1957 } 1958 1959 // In every iteration of the following loop, we count the number of backends 1960 // that are dialed. After counting, we fail all the connection attempts. 1961 // This should cause the number of dialed backends to increase by 1 in every 1962 // iteration of the loop as ringhash tries to exit TRANSIENT_FAILURE. 1963 activeAddrs := map[string]bool{} 1964 for wantBackendCount := 1; wantBackendCount <= backendsCount; wantBackendCount++ { 1965 newAddrIdx := -1 1966 for ; ctx.Err() == nil; <-time.After(time.Millisecond) { 1967 for i, hold := range holds { 1968 if !hold.IsStarted() { 1969 continue 1970 } 1971 if _, ok := activeAddrs[backends[i]]; ok { 1972 continue 1973 } 1974 activeAddrs[backends[i]] = true 1975 newAddrIdx = i 1976 } 1977 if len(activeAddrs) > wantBackendCount { 1978 t.Fatalf("More backends dialed than expected: got %d, want %d", len(activeAddrs), wantBackendCount) 1979 } 1980 if len(activeAddrs) == wantBackendCount { 1981 break 1982 } 1983 } 1984 1985 // Wait for a short time and verify no more backends are contacted. 1986 <-time.After(defaultTestShortTimeout) 1987 for i, hold := range holds { 1988 if !hold.IsStarted() { 1989 continue 1990 } 1991 activeAddrs[backends[i]] = true 1992 } 1993 if len(activeAddrs) != wantBackendCount { 1994 t.Fatalf("Unexpected number of backends dialed: got %d, want %d", len(activeAddrs), wantBackendCount) 1995 } 1996 1997 // Create a new hold for the address dialed in this iteration and fail 1998 // the existing hold. 1999 hold := holds[newAddrIdx] 2000 holds[newAddrIdx] = dialer.Hold(backends[newAddrIdx]) 2001 hold.Fail(errors.New("Test error")) 2002 } 2003 2004 // Allow the request to a backend to succeed. 2005 if !holds[1].Wait(ctx) { 2006 t.Fatalf("Context timed out waiting %q to be dialed again.", backends[1]) 2007 } 2008 holds[1].Resume() 2009 2010 // Wait for channel to become READY without any pending RPC. 2011 testutils.AwaitState(ctx, t, conn, connectivity.Ready) 2012 } 2013 2014 // Tests that first address of an endpoint is used to generate the ring. The 2015 // test sends a request to a random endpoint. The test then reverses the 2016 // addresses of every endpoint and verifies that an RPC with header pointing to 2017 // the second address of the endpoint is sent to the initial address. The test 2018 // then swaps the second and third address of the endpoint and verifies that an 2019 // RPC with the header used earlier still reaches the same backend. 2020 func (s) TestRingHash_ReorderAddressessWithinEndpoint(t *testing.T) { 2021 origDualstackEndpointsEnabled := envconfig.XDSDualstackEndpointsEnabled 2022 defer func() { 2023 envconfig.XDSDualstackEndpointsEnabled = origDualstackEndpointsEnabled 2024 }() 2025 envconfig.XDSDualstackEndpointsEnabled = true 2026 backends := backendAddrs(startTestServiceBackends(t, 6)) 2027 2028 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 2029 2030 const clusterName = "cluster" 2031 addrGroups := [][]string{ 2032 {backends[0], backends[1], backends[2]}, 2033 {backends[3], backends[4], backends[5]}, 2034 } 2035 endpoints := endpointResourceForBackendsWithMultipleAddrs(t, clusterName, addrGroups) 2036 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 2037 ClusterName: clusterName, 2038 ServiceName: clusterName, 2039 Policy: e2e.LoadBalancingPolicyRingHash, 2040 }) 2041 route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash") 2042 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 2043 2044 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 2045 defer cancel() 2046 2047 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 2048 t.Fatalf("Failed to update xDS resources: %v", err) 2049 } 2050 2051 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 2052 if err != nil { 2053 t.Fatalf("Failed to create client: %s", err) 2054 } 2055 defer conn.Close() 2056 client := testgrpc.NewTestServiceClient(conn) 2057 2058 rpcCtx := metadata.NewOutgoingContext(ctx, metadata.Pairs( 2059 "address_hash", fmt.Sprintf("%d", rand.Int()), 2060 )) 2061 var remote peer.Peer 2062 if _, err := client.EmptyCall(rpcCtx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil { 2063 t.Fatalf("rpc EmptyCall() failed: %v", err) 2064 } 2065 2066 initialFirstAddr := "" 2067 newFirstAddr := "" 2068 switch remote.Addr.String() { 2069 case addrGroups[0][0]: 2070 initialFirstAddr = addrGroups[0][0] 2071 newFirstAddr = addrGroups[0][2] 2072 case addrGroups[1][0]: 2073 initialFirstAddr = addrGroups[1][0] 2074 newFirstAddr = addrGroups[1][2] 2075 default: 2076 t.Fatalf("Request went to unexpected address: %q", remote.Addr) 2077 } 2078 2079 t.Log("Reversing addresses within each endpoint.") 2080 addrGroups1 := [][]string{ 2081 {addrGroups[0][2], addrGroups[0][1], addrGroups[0][0]}, 2082 {addrGroups[1][2], addrGroups[1][1], addrGroups[1][0]}, 2083 } 2084 endpoints = endpointResourceForBackendsWithMultipleAddrs(t, clusterName, addrGroups1) 2085 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 2086 t.Fatalf("Failed to update xDS resources: %v", err) 2087 } 2088 2089 // The first address of an endpoint is used to create the ring. This means 2090 // that requests should continue to go to the first address, but the hash 2091 // should be computed based on the last address in the original list. 2092 for ; ctx.Err() == nil; <-time.After(time.Millisecond) { 2093 rpcCtx := metadata.NewOutgoingContext(ctx, metadata.Pairs( 2094 "address_hash", newFirstAddr+"_0", 2095 )) 2096 if _, err := client.EmptyCall(rpcCtx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil { 2097 t.Fatalf("rpc EmptyCall() failed: %v", err) 2098 } 2099 if remote.Addr.String() == initialFirstAddr { 2100 break 2101 } 2102 } 2103 2104 if ctx.Err() != nil { 2105 t.Fatalf("Context timed out waiting for request to be sent to %q, last request went to %q", initialFirstAddr, remote.Addr) 2106 } 2107 2108 t.Log("Swapping the second and third addresses within each endpoint.") 2109 // This should not effect the ring, since only the first address is used 2110 // by the ring. 2111 addrGroups2 := [][]string{ 2112 {addrGroups1[0][0], addrGroups[0][2], addrGroups[0][1]}, 2113 {addrGroups1[1][0], addrGroups[1][2], addrGroups[1][1]}, 2114 } 2115 endpoints = endpointResourceForBackendsWithMultipleAddrs(t, clusterName, addrGroups2) 2116 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 2117 t.Fatalf("Failed to update xDS resources: %v", err) 2118 } 2119 2120 // Verify that requests with the hash of the last address in chosenAddrGroup 2121 // continue reaching the first address in chosenAddrGroup. 2122 shortCtx, cancel := context.WithTimeout(ctx, defaultTestShortTimeout) 2123 defer cancel() 2124 for ; shortCtx.Err() == nil; <-time.After(time.Millisecond) { 2125 rpcCtx := metadata.NewOutgoingContext(ctx, metadata.Pairs( 2126 "address_hash", newFirstAddr+"_0", 2127 )) 2128 if _, err := client.EmptyCall(rpcCtx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil { 2129 t.Fatalf("rpc EmptyCall() failed: %v", err) 2130 } 2131 if remote.Addr.String() == initialFirstAddr { 2132 continue 2133 } 2134 t.Fatalf("Request went to unexpected backend %q, want backend %q", remote.Addr, initialFirstAddr) 2135 } 2136 } 2137 2138 // Tests that requests are sent to the next address within the same endpoint 2139 // after the first address becomes unreachable. 2140 func (s) TestRingHash_FallBackWithinEndpoint(t *testing.T) { 2141 origDualstackEndpointsEnabled := envconfig.XDSDualstackEndpointsEnabled 2142 defer func() { 2143 envconfig.XDSDualstackEndpointsEnabled = origDualstackEndpointsEnabled 2144 }() 2145 envconfig.XDSDualstackEndpointsEnabled = true 2146 backends := startTestServiceBackends(t, 4) 2147 backendAddrs := backendAddrs(backends) 2148 2149 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 2150 2151 const clusterName = "cluster" 2152 endpoints := endpointResourceForBackendsWithMultipleAddrs(t, clusterName, [][]string{{backendAddrs[0], backendAddrs[1]}, {backendAddrs[2], backendAddrs[3]}}) 2153 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 2154 ClusterName: clusterName, 2155 ServiceName: clusterName, 2156 Policy: e2e.LoadBalancingPolicyRingHash, 2157 }) 2158 route := channelIDHashRoute("new_route", virtualHostName, clusterName) 2159 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 2160 2161 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 2162 defer cancel() 2163 2164 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 2165 t.Fatalf("Failed to update xDS resources: %v", err) 2166 } 2167 2168 conn, err := grpc.NewClient("xds:///test.server", grpc.WithResolvers(xdsResolver), grpc.WithTransportCredentials(insecure.NewCredentials())) 2169 if err != nil { 2170 t.Fatalf("Failed to create client: %s", err) 2171 } 2172 defer conn.Close() 2173 client := testgrpc.NewTestServiceClient(conn) 2174 2175 const numRPCs = 5 2176 received := checkRPCSendOK(ctx, t, client, numRPCs) 2177 if len(received) != 1 { 2178 t.Errorf("Got RPCs routed to %v backends, want %v", len(received), 1) 2179 } 2180 var got int 2181 var initialAddr string 2182 for initialAddr, got = range received { 2183 } 2184 if got != numRPCs { 2185 t.Errorf("Got %v RPCs routed to a backend, want %v", got, numRPCs) 2186 } 2187 2188 // Due to the channel ID hashing policy, the request could go to the first 2189 // address of either endpoint. 2190 var backendIdx int 2191 switch initialAddr { 2192 case backendAddrs[0]: 2193 backendIdx = 0 2194 case backendAddrs[2]: 2195 backendIdx = 2 2196 default: 2197 t.Fatalf("Request sent to unexpected backend: %q", initialAddr) 2198 } 2199 otherEndpointAddr := backendAddrs[backendIdx+1] 2200 2201 // Shut down the previously used backend. 2202 backends[backendIdx].Stop() 2203 testutils.AwaitState(ctx, t, conn, connectivity.Idle) 2204 2205 // Verify that the requests go to the remaining address in the same 2206 // endpoint. 2207 received = checkRPCSendOK(ctx, t, client, numRPCs) 2208 if len(received) != 1 { 2209 t.Errorf("Got RPCs routed to %v backends, want %v", len(received), 1) 2210 } 2211 var newAddr string 2212 for newAddr, got = range received { 2213 } 2214 if got != numRPCs { 2215 t.Errorf("Got %v RPCs routed to a backend, want %v", got, numRPCs) 2216 } 2217 2218 if newAddr != otherEndpointAddr { 2219 t.Errorf("Requests went to unexpected address, got=%q, want=%q", newAddr, otherEndpointAddr) 2220 } 2221 } 2222 2223 // Tests that ringhash is able to recover automatically in situations when a 2224 // READY endpoint enters IDLE making the aggregated state TRANSIENT_FAILURE. The 2225 // test creates 4 endpoints in the following connectivity states: [TF, TF, 2226 // READY, IDLE]. The test fails the READY backend and verifies that the last 2227 // IDLE endopint is dialed and the channel enters READY. 2228 func (s) TestRingHash_RecoverWhenEndpointEntersIdle(t *testing.T) { 2229 const backendsCount = 4 2230 backends := startTestServiceBackends(t, backendsCount) 2231 backendAddrs := backendAddrs(backends) 2232 2233 const clusterName = "cluster" 2234 2235 endpoints := endpointResource(t, clusterName, backendAddrs) 2236 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 2237 ClusterName: clusterName, 2238 ServiceName: clusterName, 2239 Policy: e2e.LoadBalancingPolicyRingHash, 2240 }) 2241 route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash") 2242 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 2243 2244 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 2245 defer cancel() 2246 2247 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 2248 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 2249 t.Fatalf("Failed to update xDS resources: %v", err) 2250 } 2251 2252 dialer := testutils.NewBlockingDialer() 2253 dialOpts := []grpc.DialOption{ 2254 grpc.WithResolvers(xdsResolver), 2255 grpc.WithTransportCredentials(insecure.NewCredentials()), 2256 grpc.WithContextDialer(dialer.DialContext), 2257 grpc.WithConnectParams(fastConnectParams), 2258 } 2259 conn, err := grpc.NewClient("xds:///test.server", dialOpts...) 2260 if err != nil { 2261 t.Fatalf("Failed to create client: %s", err) 2262 } 2263 defer conn.Close() 2264 2265 // Create holds for each backend address to delay a successful connection 2266 // until the end of the test. 2267 holds := make([]*testutils.Hold, backendsCount) 2268 for i := 0; i < len(backendAddrs); i++ { 2269 holds[i] = dialer.Hold(backendAddrs[i]) 2270 } 2271 2272 client := testgrpc.NewTestServiceClient(conn) 2273 2274 rpcCtx, rpcCancel := context.WithCancel(ctx) 2275 errCh := make(chan error, 1) 2276 go func() { 2277 rpcCtx = metadata.NewOutgoingContext(rpcCtx, metadata.Pairs("address_hash", backendAddrs[0]+"_0")) 2278 _, err := client.EmptyCall(rpcCtx, &testpb.Empty{}) 2279 if status.Code(err) == codes.Canceled { 2280 errCh <- nil 2281 return 2282 } 2283 errCh <- err 2284 }() 2285 2286 // Wait for the RPC to trigger a connection attempt to the first address, 2287 // then cancel the RPC. No other connection attempts should be started yet. 2288 if !holds[0].Wait(ctx) { 2289 t.Fatalf("Timeout waiting for connection attempt to backend 0") 2290 } 2291 rpcCancel() 2292 if err := <-errCh; err != nil { 2293 t.Fatalf("Expected RPC to fail be canceled, got %v", err) 2294 } 2295 2296 // The number of dialed backends increases by 1 in every iteration of the 2297 // loop as ringhash tries to exit TRANSIENT_FAILURE. Run the loop twice to 2298 // get two endpoints in TRANSIENT_FAILURE. 2299 activeAddrs := map[string]bool{} 2300 for wantFailingBackendCount := 1; wantFailingBackendCount <= 2; wantFailingBackendCount++ { 2301 newAddrIdx := -1 2302 for ; ctx.Err() == nil && len(activeAddrs) < wantFailingBackendCount; <-time.After(time.Millisecond) { 2303 for i, hold := range holds { 2304 if !hold.IsStarted() { 2305 continue 2306 } 2307 if _, ok := activeAddrs[backendAddrs[i]]; ok { 2308 continue 2309 } 2310 activeAddrs[backendAddrs[i]] = true 2311 newAddrIdx = i 2312 } 2313 } 2314 2315 if ctx.Err() != nil { 2316 t.Fatal("Context timed out waiting for new backneds to be dialed.") 2317 } 2318 if len(activeAddrs) > wantFailingBackendCount { 2319 t.Fatalf("More backends dialed than expected: got %d, want %d", len(activeAddrs), wantFailingBackendCount) 2320 } 2321 2322 // Create a new hold for the address dialed in this iteration and fail 2323 // the existing hold. 2324 hold := holds[newAddrIdx] 2325 holds[newAddrIdx] = dialer.Hold(backendAddrs[newAddrIdx]) 2326 hold.Fail(errors.New("Test error")) 2327 } 2328 2329 // Current state of endpoints: [TF, TF, READY, IDLE]. 2330 // Two endpoints failing should cause the channel to enter 2331 // TRANSIENT_FAILURE. 2332 testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure) 2333 2334 // Allow the request to the backend dialed next to succeed. 2335 readyBackendIdx := -1 2336 for ; ctx.Err() == nil && readyBackendIdx == -1; <-time.After(time.Millisecond) { 2337 for i, addr := range backendAddrs { 2338 if _, ok := activeAddrs[addr]; ok || !holds[i].IsStarted() { 2339 continue 2340 } 2341 readyBackendIdx = i 2342 activeAddrs[addr] = true 2343 holds[i].Resume() 2344 break 2345 } 2346 } 2347 2348 if ctx.Err() != nil { 2349 t.Fatal("Context timed out waiting for the next backend to be contacted.") 2350 } 2351 2352 // Wait for channel to become READY without any pending RPC. 2353 testutils.AwaitState(ctx, t, conn, connectivity.Ready) 2354 2355 // Current state of endpoints: [TF, TF, READY, IDLE]. 2356 // Stopping the READY backend should cause the channel to re-enter 2357 // TRANSIENT_FAILURE. 2358 backends[readyBackendIdx].Stop() 2359 testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure) 2360 2361 // To recover from TRANSIENT_FAILURE, ringhash should automatically try to 2362 // connect to the final endpoint. 2363 readyBackendIdx = -1 2364 for ; ctx.Err() == nil && readyBackendIdx == -1; <-time.After(time.Millisecond) { 2365 for i, addr := range backendAddrs { 2366 if _, ok := activeAddrs[addr]; ok || !holds[i].IsStarted() { 2367 continue 2368 } 2369 readyBackendIdx = i 2370 activeAddrs[addr] = true 2371 holds[i].Resume() 2372 break 2373 } 2374 } 2375 2376 if ctx.Err() != nil { 2377 t.Fatal("Context timed out waiting for next backend to be contacted.") 2378 } 2379 2380 // Wait for channel to become READY without any pending RPC. 2381 testutils.AwaitState(ctx, t, conn, connectivity.Ready) 2382 } 2383 2384 // Tests that ringhash is able to recover automatically in situations when a 2385 // READY endpoint is removed by the resolver making the aggregated state 2386 // TRANSIENT_FAILURE. The test creates 4 endpoints in the following 2387 // connectivity states: [TF, TF, READY, IDLE]. The test removes the 2388 // READY endpoint and verifies that the last IDLE endopint is dialed and the 2389 // channel enters READY. 2390 func (s) TestRingHash_RecoverWhenResolverRemovesEndpoint(t *testing.T) { 2391 const backendsCount = 4 2392 backends := startTestServiceBackends(t, backendsCount) 2393 backendAddrs := backendAddrs(backends) 2394 2395 const clusterName = "cluster" 2396 2397 endpoints := endpointResource(t, clusterName, backendAddrs) 2398 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 2399 ClusterName: clusterName, 2400 ServiceName: clusterName, 2401 Policy: e2e.LoadBalancingPolicyRingHash, 2402 }) 2403 route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash") 2404 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 2405 2406 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 2407 defer cancel() 2408 2409 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 2410 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 2411 t.Fatalf("Failed to update xDS resources: %v", err) 2412 } 2413 2414 dialer := testutils.NewBlockingDialer() 2415 dialOpts := []grpc.DialOption{ 2416 grpc.WithResolvers(xdsResolver), 2417 grpc.WithTransportCredentials(insecure.NewCredentials()), 2418 grpc.WithContextDialer(dialer.DialContext), 2419 grpc.WithConnectParams(fastConnectParams), 2420 } 2421 conn, err := grpc.NewClient("xds:///test.server", dialOpts...) 2422 if err != nil { 2423 t.Fatalf("Failed to create client: %s", err) 2424 } 2425 defer conn.Close() 2426 2427 // Create holds for each backend address to delay a successful connection 2428 // until the end of the test. 2429 holds := make([]*testutils.Hold, backendsCount) 2430 for i := 0; i < len(backendAddrs); i++ { 2431 holds[i] = dialer.Hold(backendAddrs[i]) 2432 } 2433 2434 client := testgrpc.NewTestServiceClient(conn) 2435 2436 rpcCtx, rpcCancel := context.WithCancel(ctx) 2437 errCh := make(chan error, 1) 2438 go func() { 2439 rpcCtx = metadata.NewOutgoingContext(rpcCtx, metadata.Pairs("address_hash", backendAddrs[0]+"_0")) 2440 _, err := client.EmptyCall(rpcCtx, &testpb.Empty{}) 2441 if status.Code(err) == codes.Canceled { 2442 errCh <- nil 2443 return 2444 } 2445 errCh <- err 2446 }() 2447 2448 // Wait for the RPC to trigger a connection attempt to the first address, 2449 // then cancel the RPC. No other connection attempts should be started yet. 2450 if !holds[0].Wait(ctx) { 2451 t.Fatalf("Timeout waiting for connection attempt to backend 0") 2452 } 2453 rpcCancel() 2454 if err := <-errCh; err != nil { 2455 t.Fatalf("Expected RPC to fail be canceled, got %v", err) 2456 } 2457 2458 // The number of dialed backends increases by 1 in every iteration of the 2459 // loop as ringhash tries to exit TRANSIENT_FAILURE. Run the loop twice to 2460 // get two endpoints in TRANSIENT_FAILURE. 2461 activeAddrs := map[string]bool{} 2462 for wantFailingBackendCount := 1; wantFailingBackendCount <= 2; wantFailingBackendCount++ { 2463 newAddrIdx := -1 2464 for ; ctx.Err() == nil && len(activeAddrs) < wantFailingBackendCount; <-time.After(time.Millisecond) { 2465 for i, hold := range holds { 2466 if !hold.IsStarted() { 2467 continue 2468 } 2469 if _, ok := activeAddrs[backendAddrs[i]]; ok { 2470 continue 2471 } 2472 activeAddrs[backendAddrs[i]] = true 2473 newAddrIdx = i 2474 } 2475 } 2476 2477 if ctx.Err() != nil { 2478 t.Fatal("Context timed out waiting for new backneds to be dialed.") 2479 } 2480 if len(activeAddrs) > wantFailingBackendCount { 2481 t.Fatalf("More backends dialed than expected: got %d, want %d", len(activeAddrs), wantFailingBackendCount) 2482 } 2483 2484 // Create a new hold for the address dialed in this iteration and fail 2485 // the existing hold. 2486 hold := holds[newAddrIdx] 2487 holds[newAddrIdx] = dialer.Hold(backendAddrs[newAddrIdx]) 2488 hold.Fail(errors.New("Test error")) 2489 } 2490 2491 // Current state of endpoints: [TF, TF, READY, IDLE]. 2492 // Two endpoints failing should cause the channel to enter 2493 // TRANSIENT_FAILURE. 2494 testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure) 2495 2496 // Allow the request to the backend dialed next to succeed. 2497 readyBackendIdx := -1 2498 for ; ctx.Err() == nil && readyBackendIdx == -1; <-time.After(time.Millisecond) { 2499 for i, addr := range backendAddrs { 2500 if _, ok := activeAddrs[addr]; ok || !holds[i].IsStarted() { 2501 continue 2502 } 2503 readyBackendIdx = i 2504 activeAddrs[addr] = true 2505 holds[i].Resume() 2506 break 2507 } 2508 } 2509 2510 if ctx.Err() != nil { 2511 t.Fatal("Context timed out waiting for the next backend to be contacted.") 2512 } 2513 2514 // Wait for channel to become READY without any pending RPC. 2515 testutils.AwaitState(ctx, t, conn, connectivity.Ready) 2516 2517 // Current state of endpoints: [TF, TF, READY, IDLE]. 2518 // Removing the READY backend should cause the channel to re-enter 2519 // TRANSIENT_FAILURE. 2520 updatedAddrs := append([]string{}, backendAddrs[:readyBackendIdx]...) 2521 updatedAddrs = append(updatedAddrs, backendAddrs[readyBackendIdx+1:]...) 2522 updatedEndpoints := endpointResource(t, clusterName, updatedAddrs) 2523 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, updatedEndpoints, cluster, route, listener)); err != nil { 2524 t.Fatalf("Failed to update xDS resources: %v", err) 2525 } 2526 testutils.AwaitState(ctx, t, conn, connectivity.TransientFailure) 2527 2528 // To recover from TRANSIENT_FAILURE, ringhash should automatically try to 2529 // connect to the final endpoint. 2530 readyBackendIdx = -1 2531 for ; ctx.Err() == nil && readyBackendIdx == -1; <-time.After(time.Millisecond) { 2532 for i, addr := range backendAddrs { 2533 if _, ok := activeAddrs[addr]; ok || !holds[i].IsStarted() { 2534 continue 2535 } 2536 readyBackendIdx = i 2537 activeAddrs[addr] = true 2538 holds[i].Resume() 2539 break 2540 } 2541 } 2542 2543 if ctx.Err() != nil { 2544 t.Fatal("Context timed out waiting for next backend to be contacted.") 2545 } 2546 2547 // Wait for channel to become READY without any pending RPC. 2548 testutils.AwaitState(ctx, t, conn, connectivity.Ready) 2549 } 2550 2551 // Tests that RPCs are routed according to endpoint hash key rather than 2552 // endpoint first address if it is set in EDS endpoint metadata. 2553 func (s) TestRingHash_EndpointHashKey(t *testing.T) { 2554 testutils.SetEnvConfig(t, &envconfig.XDSEndpointHashKeyBackwardCompat, false) 2555 2556 backends := backendAddrs(startTestServiceBackends(t, 4)) 2557 2558 const clusterName = "cluster" 2559 var backendOpts []e2e.BackendOptions 2560 for i, addr := range backends { 2561 var ports []uint32 2562 ports = append(ports, testutils.ParsePort(t, addr)) 2563 backendOpts = append(backendOpts, e2e.BackendOptions{ 2564 Ports: ports, 2565 Metadata: map[string]any{"hash_key": strconv.Itoa(i)}, 2566 }) 2567 } 2568 endpoints := e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 2569 ClusterName: clusterName, 2570 Host: "localhost", 2571 Localities: []e2e.LocalityOptions{{ 2572 Backends: backendOpts, 2573 Weight: 1, 2574 }}, 2575 }) 2576 cluster := e2e.ClusterResourceWithOptions(e2e.ClusterOptions{ 2577 ClusterName: clusterName, 2578 ServiceName: clusterName, 2579 Policy: e2e.LoadBalancingPolicyRingHash, 2580 }) 2581 route := headerHashRoute("new_route", virtualHostName, clusterName, "address_hash") 2582 listener := e2e.DefaultClientListener(virtualHostName, route.Name) 2583 2584 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 2585 defer cancel() 2586 2587 xdsServer, nodeID, xdsResolver := setupManagementServerAndResolver(t) 2588 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 2589 t.Fatalf("Failed to update xDS resources: %v", err) 2590 } 2591 2592 opts := []grpc.DialOption{ 2593 grpc.WithResolvers(xdsResolver), 2594 grpc.WithTransportCredentials(insecure.NewCredentials()), 2595 } 2596 conn, err := grpc.NewClient("xds:///test.server", opts...) 2597 if err != nil { 2598 t.Fatalf("Failed to create client: %s", err) 2599 } 2600 defer conn.Close() 2601 client := testgrpc.NewTestServiceClient(conn) 2602 2603 // Make sure RPCs are routed to backends according to the endpoint metadata 2604 // rather than their address. Note each type of RPC contains a header value 2605 // that will always be hashed to a specific backend as the header value 2606 // matches the endpoint metadata hash key. 2607 for i, backend := range backends { 2608 ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", strconv.Itoa(i)+"_0")) 2609 numRPCs := 10 2610 reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs) 2611 if reqPerBackend[backend] != numRPCs { 2612 t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend) 2613 } 2614 } 2615 2616 // Update the endpoints to swap the metadata hash key. 2617 for i := range backendOpts { 2618 backendOpts[i].Metadata = map[string]any{"hash_key": strconv.Itoa(len(backends) - i - 1)} 2619 } 2620 endpoints = e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 2621 ClusterName: clusterName, 2622 Host: "localhost", 2623 Localities: []e2e.LocalityOptions{{ 2624 Backends: backendOpts, 2625 Weight: 1, 2626 }}, 2627 }) 2628 if err := xdsServer.Update(ctx, xdsUpdateOpts(nodeID, endpoints, cluster, route, listener)); err != nil { 2629 t.Fatalf("Failed to update xDS resources: %v", err) 2630 } 2631 2632 // Wait for the resolver update to make it to the balancer. This RPC should 2633 // be routed to backend 3 with the reverse numbering of the hash_key 2634 // attribute delivered above. 2635 for { 2636 ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", "0_0")) 2637 var remote peer.Peer 2638 if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil { 2639 t.Fatalf("Unexpected RPC error waiting for EDS update propagation: %s", err) 2640 } 2641 if remote.Addr.String() == backends[3] { 2642 break 2643 } 2644 } 2645 2646 // Now that the balancer has the new endpoint attributes, make sure RPCs are 2647 // routed to backends according to the new endpoint metadata. 2648 for i, backend := range backends { 2649 ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", strconv.Itoa(len(backends)-i-1)+"_0")) 2650 numRPCs := 10 2651 reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs) 2652 if reqPerBackend[backend] != numRPCs { 2653 t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend) 2654 } 2655 } 2656 } 2657 2658 // Tests that when a request hash key is set in the balancer configuration via 2659 // service config, this header is used to route to a specific backend. 2660 func (s) TestRingHash_RequestHashKey(t *testing.T) { 2661 testutils.SetEnvConfig(t, &envconfig.RingHashSetRequestHashKey, true) 2662 2663 backends := backendAddrs(startTestServiceBackends(t, 4)) 2664 2665 // Create a clientConn with a manual resolver (which is used to push the 2666 // address of the test backend), and a default service config pointing to 2667 // the use of the ring_hash_experimental LB policy with an explicit hash 2668 // header. 2669 const ringHashServiceConfig = `{"loadBalancingConfig": [{"ring_hash_experimental":{"requestHashHeader":"address_hash"}}]}` 2670 r := manual.NewBuilderWithScheme("whatever") 2671 dopts := []grpc.DialOption{ 2672 grpc.WithTransportCredentials(insecure.NewCredentials()), 2673 grpc.WithResolvers(r), 2674 grpc.WithDefaultServiceConfig(ringHashServiceConfig), 2675 grpc.WithConnectParams(fastConnectParams), 2676 } 2677 cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...) 2678 if err != nil { 2679 t.Fatalf("Failed to dial local test server: %v", err) 2680 } 2681 defer cc.Close() 2682 var endpoints []resolver.Endpoint 2683 for _, backend := range backends { 2684 endpoints = append(endpoints, resolver.Endpoint{ 2685 Addresses: []resolver.Address{{Addr: backend}}, 2686 }) 2687 } 2688 r.UpdateState(resolver.State{ 2689 Endpoints: endpoints, 2690 }) 2691 client := testgrpc.NewTestServiceClient(cc) 2692 2693 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 2694 defer cancel() 2695 2696 // Note each type of RPC contains a header value that will always be hashed 2697 // to a specific backend as the header value matches the value used to 2698 // create the entry in the ring. 2699 for _, backend := range backends { 2700 ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("address_hash", backend+"_0")) 2701 numRPCs := 10 2702 reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs) 2703 if reqPerBackend[backend] != numRPCs { 2704 t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend) 2705 } 2706 } 2707 2708 const ringHashServiceConfigUpdate = `{"loadBalancingConfig": [{"ring_hash_experimental":{"requestHashHeader":"other_header"}}]}` 2709 r.UpdateState(resolver.State{ 2710 Endpoints: endpoints, 2711 ServiceConfig: (&testutils.ResolverClientConn{}).ParseServiceConfig(ringHashServiceConfigUpdate), 2712 }) 2713 2714 // Make sure that requests with the new hash are sent to the right backend. 2715 for _, backend := range backends { 2716 ctx := metadata.NewOutgoingContext(ctx, metadata.Pairs("other_header", backend+"_0")) 2717 numRPCs := 10 2718 reqPerBackend := checkRPCSendOK(ctx, t, client, numRPCs) 2719 if reqPerBackend[backend] != numRPCs { 2720 t.Errorf("Got RPC routed to addresses %v, want all RPCs routed to %v", reqPerBackend, backend) 2721 } 2722 } 2723 } 2724 2725 func highRingSizeServiceConfig(t *testing.T) string { 2726 t.Helper() 2727 testutils.SetEnvConfig(t, &envconfig.RingHashCap, minRingSize) 2728 2729 return fmt.Sprintf(`{ 2730 "loadBalancingConfig": [{"ring_hash_experimental":{ 2731 "requestHashHeader": "address_hash", 2732 "minRingSize": %d, 2733 "maxRingSize": %d 2734 } 2735 }]}`, minRingSize, minRingSize) 2736 } 2737 2738 // Tests that when a request hash key is set in the balancer configuration via 2739 // service config, and the header is not set in the outgoing request, then it 2740 // is sent to a random backend. 2741 func (s) TestRingHash_RequestHashKeyRandom(t *testing.T) { 2742 testutils.SetEnvConfig(t, &envconfig.RingHashSetRequestHashKey, true) 2743 2744 backends := backendAddrs(startTestServiceBackends(t, 4)) 2745 2746 // Create a clientConn with a manual resolver (which is used to push the 2747 // address of the test backend), and a default service config pointing to 2748 // the use of the ring_hash_experimental LB policy with an explicit hash 2749 // header. 2750 ringHashServiceConfig := highRingSizeServiceConfig(t) 2751 r := manual.NewBuilderWithScheme("whatever") 2752 dopts := []grpc.DialOption{ 2753 grpc.WithTransportCredentials(insecure.NewCredentials()), 2754 grpc.WithResolvers(r), 2755 grpc.WithDefaultServiceConfig(ringHashServiceConfig), 2756 grpc.WithConnectParams(fastConnectParams), 2757 } 2758 cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...) 2759 if err != nil { 2760 t.Fatalf("Failed to dial local test server: %v", err) 2761 } 2762 defer cc.Close() 2763 var endpoints []resolver.Endpoint 2764 for _, backend := range backends { 2765 endpoints = append(endpoints, resolver.Endpoint{ 2766 Addresses: []resolver.Address{{Addr: backend}}, 2767 }) 2768 } 2769 r.UpdateState(resolver.State{ 2770 Endpoints: endpoints, 2771 }) 2772 client := testgrpc.NewTestServiceClient(cc) 2773 2774 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 2775 defer cancel() 2776 2777 // Due to the way that ring hash lazily establishes connections when using a 2778 // random hash, request distribution is skewed towards the order in which we 2779 // connected. The test send RPCs until we are connected to all backends, so 2780 // we can later assert that the distribution is uniform. 2781 seen := make(map[string]bool) 2782 for len(seen) != 4 { 2783 var remote peer.Peer 2784 if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&remote)); err != nil { 2785 t.Fatalf("rpc EmptyCall() failed: %v", err) 2786 } 2787 seen[remote.String()] = true 2788 } 2789 2790 // Make sure that requests with the old hash are sent to random backends. 2791 const want = 1.0 / 4 2792 numRPCs := computeIdealNumberOfRPCs(t, want, errorTolerance) 2793 gotPerBackend := checkRPCSendOK(ctx, t, client, numRPCs) 2794 for _, backend := range backends { 2795 got := float64(gotPerBackend[backend]) / float64(numRPCs) 2796 if !cmp.Equal(got, want, cmpopts.EquateApprox(0, errorTolerance)) { 2797 t.Errorf("Fraction of RPCs to backend %s: got %v, want %v (margin: +-%v)", backend, got, want, errorTolerance) 2798 } 2799 } 2800 } 2801 2802 // Tests that when a request hash key is set in the balancer configuration via 2803 // service config, and the header is not set in the outgoing request (random 2804 // behavior), then each RPC wakes up at most one SubChannel, and, if there are 2805 // SubChannels in Ready state, RPCs are routed to them. 2806 func (s) TestRingHash_RequestHashKeyConnecting(t *testing.T) { 2807 testutils.SetEnvConfig(t, &envconfig.RingHashSetRequestHashKey, true) 2808 2809 backends := backendAddrs(startTestServiceBackends(t, 20)) 2810 2811 // Create a clientConn with a manual resolver (which is used to push the 2812 // address of the test backend), and a default service config pointing to 2813 // the use of the ring_hash_experimental LB policy with an explicit hash 2814 // header. Use a blocking dialer to control connection attempts. 2815 const ringHashServiceConfig = `{"loadBalancingConfig": [ 2816 {"ring_hash_experimental":{"requestHashHeader":"address_hash"}} 2817 ]}` 2818 r := manual.NewBuilderWithScheme("whatever") 2819 blockingDialer := testutils.NewBlockingDialer() 2820 dopts := []grpc.DialOption{ 2821 grpc.WithTransportCredentials(insecure.NewCredentials()), 2822 grpc.WithResolvers(r), 2823 grpc.WithDefaultServiceConfig(ringHashServiceConfig), 2824 grpc.WithConnectParams(fastConnectParams), 2825 grpc.WithContextDialer(blockingDialer.DialContext), 2826 } 2827 cc, err := grpc.NewClient(r.Scheme()+":///test.server", dopts...) 2828 if err != nil { 2829 t.Fatalf("Failed to dial local test server: %v", err) 2830 } 2831 defer cc.Close() 2832 var endpoints []resolver.Endpoint 2833 for _, backend := range backends { 2834 endpoints = append(endpoints, resolver.Endpoint{ 2835 Addresses: []resolver.Address{{Addr: backend}}, 2836 }) 2837 } 2838 r.UpdateState(resolver.State{ 2839 Endpoints: endpoints, 2840 }) 2841 client := testgrpc.NewTestServiceClient(cc) 2842 2843 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 2844 defer cancel() 2845 2846 // Intercept all connection attempts to the backends. 2847 var holds []*testutils.Hold 2848 for i := 0; i < len(backends); i++ { 2849 holds = append(holds, blockingDialer.Hold(backends[i])) 2850 } 2851 2852 wg := sync.WaitGroup{} 2853 wg.Add(1) 2854 go func() { 2855 // Send 1 RPC and make sure this triggers at most 1 connection attempt. 2856 _, err := client.EmptyCall(ctx, &testpb.Empty{}) 2857 if err != nil { 2858 t.Errorf("EmptyCall(): got %v, want success", err) 2859 } 2860 wg.Done() 2861 }() 2862 2863 // Wait for at least one connection attempt. 2864 nConn := 0 2865 for nConn == 0 { 2866 if ctx.Err() != nil { 2867 t.Fatal("Test timed out waiting for a connection attempt") 2868 } 2869 time.Sleep(1 * time.Millisecond) 2870 for _, hold := range holds { 2871 if hold.IsStarted() { 2872 nConn++ 2873 } 2874 } 2875 } 2876 if wantMaxConn := 1; nConn > wantMaxConn { 2877 t.Fatalf("Got %d connection attempts, want at most %d", nConn, wantMaxConn) 2878 } 2879 2880 // Do a second RPC. Since there should already be a SubChannel in 2881 // Connecting state, this should not trigger a connection attempt. 2882 wg.Add(1) 2883 go func() { 2884 _, err := client.EmptyCall(ctx, &testpb.Empty{}) 2885 if err != nil { 2886 t.Errorf("EmptyCall(): got %v, want success", err) 2887 } 2888 wg.Done() 2889 }() 2890 2891 // Give extra time for more connections to be attempted. 2892 time.Sleep(defaultTestShortTimeout) 2893 2894 var firstConnectedBackend string 2895 nConn = 0 2896 for i, hold := range holds { 2897 if hold.IsStarted() { 2898 // Unblock the connection attempt. The SubChannel (and hence the 2899 // channel) should transition to Ready. RPCs should succeed and 2900 // be routed to this backend. 2901 hold.Resume() 2902 holds[i] = nil 2903 firstConnectedBackend = backends[i] 2904 nConn++ 2905 } 2906 } 2907 if wantMaxConn := 1; nConn > wantMaxConn { 2908 t.Fatalf("Got %d connection attempts, want at most %d", nConn, wantMaxConn) 2909 } 2910 testutils.AwaitState(ctx, t, cc, connectivity.Ready) 2911 wg.Wait() // Make sure we're done with the 2 previous RPCs. 2912 2913 // Now send RPCs until we have at least one more connection attempt, that 2914 // is, the random hash did not land on the same backend on every pick (the 2915 // chances are low, but we don't want this to be flaky). Make sure no RPC 2916 // fails and that we route all of them to the only subchannel in ready 2917 // state. 2918 nConn = 0 2919 for nConn == 0 { 2920 p := peer.Peer{} 2921 _, err = client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&p)) 2922 if status.Code(err) == codes.DeadlineExceeded { 2923 t.Fatal("EmptyCall(): test timed out while waiting for more connection attempts") 2924 } 2925 if err != nil { 2926 t.Fatalf("EmptyCall(): got %v, want success", err) 2927 } 2928 if p.Addr.String() != firstConnectedBackend { 2929 t.Errorf("RPC sent to backend %q, want %q", p.Addr.String(), firstConnectedBackend) 2930 } 2931 for _, hold := range holds { 2932 if hold != nil && hold.IsStarted() { 2933 nConn++ 2934 } 2935 } 2936 } 2937 }