google.golang.org/grpc@v1.72.2/xds/internal/balancer/clusterresolver/e2e_test/aggregate_cluster_test.go (about)

     1  /*
     2   * Copyright 2023 gRPC authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package e2e_test
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"net"
    23  	"sort"
    24  	"strconv"
    25  	"strings"
    26  	"testing"
    27  	"time"
    28  
    29  	"github.com/google/go-cmp/cmp"
    30  	"github.com/google/uuid"
    31  	"google.golang.org/grpc"
    32  	"google.golang.org/grpc/codes"
    33  	"google.golang.org/grpc/connectivity"
    34  	"google.golang.org/grpc/credentials/insecure"
    35  	"google.golang.org/grpc/internal"
    36  	"google.golang.org/grpc/internal/stubserver"
    37  	"google.golang.org/grpc/internal/testutils/pickfirst"
    38  	"google.golang.org/grpc/internal/testutils/xds/e2e"
    39  	"google.golang.org/grpc/internal/xds/bootstrap"
    40  	"google.golang.org/grpc/peer"
    41  	"google.golang.org/grpc/resolver"
    42  	"google.golang.org/grpc/resolver/manual"
    43  	"google.golang.org/grpc/serviceconfig"
    44  	"google.golang.org/grpc/status"
    45  	"google.golang.org/grpc/xds/internal/xdsclient"
    46  	"google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version"
    47  	"google.golang.org/protobuf/types/known/wrapperspb"
    48  
    49  	v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
    50  	v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
    51  	v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
    52  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    53  	testpb "google.golang.org/grpc/interop/grpc_testing"
    54  )
    55  
    56  // makeAggregateClusterResource returns an aggregate cluster resource with the
    57  // given name and list of child names.
    58  func makeAggregateClusterResource(name string, childNames []string) *v3clusterpb.Cluster {
    59  	return e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
    60  		ClusterName: name,
    61  		Type:        e2e.ClusterTypeAggregate,
    62  		ChildNames:  childNames,
    63  	})
    64  }
    65  
    66  // makeLogicalDNSClusterResource returns a LOGICAL_DNS cluster resource with the
    67  // given name and given DNS host and port.
    68  func makeLogicalDNSClusterResource(name, dnsHost string, dnsPort uint32) *v3clusterpb.Cluster {
    69  	return e2e.ClusterResourceWithOptions(e2e.ClusterOptions{
    70  		ClusterName: name,
    71  		Type:        e2e.ClusterTypeLogicalDNS,
    72  		DNSHostName: dnsHost,
    73  		DNSPort:     dnsPort,
    74  	})
    75  }
    76  
    77  // setupDNS unregisters the DNS resolver and registers a manual resolver for the
    78  // same scheme. This allows the test to mock the DNS resolution by supplying the
    79  // addresses of the test backends.
    80  //
    81  // Returns the following:
    82  //   - a channel onto which the DNS target being resolved is written to by the
    83  //     mock DNS resolver
    84  //   - a manual resolver which is used to mock the actual DNS resolution
    85  func setupDNS(t *testing.T) (chan resolver.Target, *manual.Resolver) {
    86  	targetCh := make(chan resolver.Target, 1)
    87  
    88  	mr := manual.NewBuilderWithScheme("dns")
    89  	mr.BuildCallback = func(target resolver.Target, _ resolver.ClientConn, _ resolver.BuildOptions) { targetCh <- target }
    90  
    91  	dnsResolverBuilder := resolver.Get("dns")
    92  	resolver.Register(mr)
    93  
    94  	t.Cleanup(func() { resolver.Register(dnsResolverBuilder) })
    95  	return targetCh, mr
    96  }
    97  
    98  // TestAggregateCluster_WithTwoEDSClusters tests the case where the top-level
    99  // cluster resource is an aggregate cluster. It verifies that RPCs fail when the
   100  // management server has not responded to all requested EDS resources, and also
   101  // that RPCs are routed to the highest priority cluster once all requested EDS
   102  // resources have been sent by the management server.
   103  func (s) TestAggregateCluster_WithTwoEDSClusters(t *testing.T) {
   104  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   105  	defer cancel()
   106  
   107  	// Start an xDS management server that pushes the EDS resource names onto a
   108  	// channel when requested.
   109  	edsResourceNameCh := make(chan []string, 1)
   110  	managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{
   111  		OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error {
   112  			if req.GetTypeUrl() != version.V3EndpointsURL {
   113  				return nil
   114  			}
   115  			if len(req.GetResourceNames()) == 0 {
   116  				// This happens at the end of the test when the grpc channel is
   117  				// being shut down and it is no longer interested in xDS
   118  				// resources.
   119  				return nil
   120  			}
   121  			select {
   122  			case edsResourceNameCh <- req.GetResourceNames():
   123  			case <-ctx.Done():
   124  			}
   125  			return nil
   126  		},
   127  		AllowResourceSubset: true,
   128  	})
   129  
   130  	// Create bootstrap configuration pointing to the above management server.
   131  	nodeID := uuid.New().String()
   132  	bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address)
   133  
   134  	// Start two test backends and extract their host and port. The first
   135  	// backend belongs to EDS cluster "cluster-1", while the second backend
   136  	// belongs to EDS cluster "cluster-2".
   137  	servers, cleanup2 := startTestServiceBackends(t, 2)
   138  	defer cleanup2()
   139  	addrs, ports := backendAddressesAndPorts(t, servers)
   140  
   141  	// Configure an aggregate cluster, two EDS clusters and only one endpoints
   142  	// resource (corresponding to the first EDS cluster) in the management
   143  	// server.
   144  	const clusterName1 = clusterName + "-cluster-1"
   145  	const clusterName2 = clusterName + "-cluster-2"
   146  	resources := e2e.UpdateOptions{
   147  		NodeID: nodeID,
   148  		Clusters: []*v3clusterpb.Cluster{
   149  			makeAggregateClusterResource(clusterName, []string{clusterName1, clusterName2}),
   150  			e2e.DefaultCluster(clusterName1, "", e2e.SecurityLevelNone),
   151  			e2e.DefaultCluster(clusterName2, "", e2e.SecurityLevelNone),
   152  		},
   153  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(clusterName1, "localhost", []uint32{uint32(ports[0])})},
   154  		SkipValidation: true,
   155  	}
   156  	if err := managementServer.Update(ctx, resources); err != nil {
   157  		t.Fatal(err)
   158  	}
   159  
   160  	// Create xDS client, configure cds_experimental LB policy with a manual
   161  	// resolver, and dial the test backends.
   162  	cc, cleanup := setupAndDial(t, bootstrapContents)
   163  	defer cleanup()
   164  
   165  	// Wait for both EDS resources to be requested.
   166  	func() {
   167  		for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) {
   168  			select {
   169  			case names := <-edsResourceNameCh:
   170  				// Copy and sort the sortedNames to avoid racing with an
   171  				// OnStreamRequest call.
   172  				sortedNames := make([]string, len(names))
   173  				copy(sortedNames, names)
   174  				sort.Strings(sortedNames)
   175  				if cmp.Equal(sortedNames, []string{clusterName1, clusterName2}) {
   176  					return
   177  				}
   178  			default:
   179  			}
   180  		}
   181  	}()
   182  	if ctx.Err() != nil {
   183  		t.Fatalf("Timeout when waiting for all EDS resources %v to be requested", []string{clusterName1, clusterName2})
   184  	}
   185  
   186  	// Make an RPC with a short deadline. We expect this RPC to not succeed
   187  	// because the management server has not responded with all EDS resources
   188  	// requested.
   189  	client := testgrpc.NewTestServiceClient(cc)
   190  	sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   191  	defer sCancel()
   192  	if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded {
   193  		t.Fatalf("EmptyCall() code %s, want %s", status.Code(err), codes.DeadlineExceeded)
   194  	}
   195  
   196  	// Update the management server with the second EDS resource.
   197  	resources.Endpoints = append(resources.Endpoints, e2e.DefaultEndpoint(clusterName2, "localhost", []uint32{uint32(ports[1])}))
   198  	if err := managementServer.Update(ctx, resources); err != nil {
   199  		t.Fatal(err)
   200  	}
   201  
   202  	// Make an RPC and ensure that it gets routed to cluster-1, implicitly
   203  	// higher priority than cluster-2.
   204  	peer := &peer.Peer{}
   205  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil {
   206  		t.Fatalf("EmptyCall() failed: %v", err)
   207  	}
   208  	if peer.Addr.String() != addrs[0].Addr {
   209  		t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[0].Addr)
   210  	}
   211  }
   212  
   213  // TestAggregateCluster_WithTwoEDSClusters_PrioritiesChange tests the case where
   214  // the top-level cluster resource is an aggregate cluster. It verifies that RPCs
   215  // are routed to the highest priority EDS cluster.
   216  func (s) TestAggregateCluster_WithTwoEDSClusters_PrioritiesChange(t *testing.T) {
   217  	// Start an xDS management server.
   218  	managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true})
   219  
   220  	// Create bootstrap configuration pointing to the above management server.
   221  	nodeID := uuid.New().String()
   222  	bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address)
   223  
   224  	// Start two test backends and extract their host and port. The first
   225  	// backend belongs to EDS cluster "cluster-1", while the second backend
   226  	// belongs to EDS cluster "cluster-2".
   227  	servers, cleanup2 := startTestServiceBackends(t, 2)
   228  	defer cleanup2()
   229  	addrs, ports := backendAddressesAndPorts(t, servers)
   230  
   231  	// Configure an aggregate cluster, two EDS clusters and the corresponding
   232  	// endpoints resources in the management server.
   233  	const clusterName1 = clusterName + "cluster-1"
   234  	const clusterName2 = clusterName + "cluster-2"
   235  	resources := e2e.UpdateOptions{
   236  		NodeID: nodeID,
   237  		Clusters: []*v3clusterpb.Cluster{
   238  			makeAggregateClusterResource(clusterName, []string{clusterName1, clusterName2}),
   239  			e2e.DefaultCluster(clusterName1, "", e2e.SecurityLevelNone),
   240  			e2e.DefaultCluster(clusterName2, "", e2e.SecurityLevelNone),
   241  		},
   242  		Endpoints: []*v3endpointpb.ClusterLoadAssignment{
   243  			e2e.DefaultEndpoint(clusterName1, "localhost", []uint32{uint32(ports[0])}),
   244  			e2e.DefaultEndpoint(clusterName2, "localhost", []uint32{uint32(ports[1])}),
   245  		},
   246  		SkipValidation: true,
   247  	}
   248  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   249  	defer cancel()
   250  	if err := managementServer.Update(ctx, resources); err != nil {
   251  		t.Fatal(err)
   252  	}
   253  
   254  	// Create xDS client, configure cds_experimental LB policy with a manual
   255  	// resolver, and dial the test backends.
   256  	cc, cleanup := setupAndDial(t, bootstrapContents)
   257  	defer cleanup()
   258  
   259  	// Make an RPC and ensure that it gets routed to cluster-1, implicitly
   260  	// higher priority than cluster-2.
   261  	client := testgrpc.NewTestServiceClient(cc)
   262  	peer := &peer.Peer{}
   263  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil {
   264  		t.Fatalf("EmptyCall() failed: %v", err)
   265  	}
   266  	if peer.Addr.String() != addrs[0].Addr {
   267  		t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[0].Addr)
   268  	}
   269  
   270  	// Swap the priorities of the EDS clusters in the aggregate cluster.
   271  	resources.Clusters = []*v3clusterpb.Cluster{
   272  		makeAggregateClusterResource(clusterName, []string{clusterName2, clusterName1}),
   273  		e2e.DefaultCluster(clusterName1, "", e2e.SecurityLevelNone),
   274  		e2e.DefaultCluster(clusterName2, "", e2e.SecurityLevelNone),
   275  	}
   276  	if err := managementServer.Update(ctx, resources); err != nil {
   277  		t.Fatal(err)
   278  	}
   279  
   280  	// Wait for RPCs to get routed to cluster-2, which is now implicitly higher
   281  	// priority than cluster-1, after the priority switch above.
   282  	for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) {
   283  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil {
   284  			t.Fatalf("EmptyCall() failed: %v", err)
   285  		}
   286  		if peer.Addr.String() == addrs[1].Addr {
   287  			break
   288  		}
   289  	}
   290  	if ctx.Err() != nil {
   291  		t.Fatal("Timeout waiting for RPCs to be routed to cluster-2 after priority switch")
   292  	}
   293  }
   294  
   295  func hostAndPortFromAddress(t *testing.T, addr string) (string, uint32) {
   296  	t.Helper()
   297  
   298  	host, p, err := net.SplitHostPort(addr)
   299  	if err != nil {
   300  		t.Fatalf("Invalid serving address: %v", addr)
   301  	}
   302  	port, err := strconv.ParseUint(p, 10, 32)
   303  	if err != nil {
   304  		t.Fatalf("Invalid serving port %q: %v", p, err)
   305  	}
   306  	return host, uint32(port)
   307  }
   308  
   309  // TestAggregateCluster_WithOneDNSCluster tests the case where the top-level
   310  // cluster resource is an aggregate cluster that resolves to a single
   311  // LOGICAL_DNS cluster. The test verifies that RPCs can be made to backends that
   312  // make up the LOGICAL_DNS cluster.
   313  func (s) TestAggregateCluster_WithOneDNSCluster(t *testing.T) {
   314  	// Start an xDS management server.
   315  	managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true})
   316  
   317  	// Create bootstrap configuration pointing to the above management server.
   318  	nodeID := uuid.New().String()
   319  	bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address)
   320  
   321  	// Start a test service backend.
   322  	server := stubserver.StartTestService(t, nil)
   323  	defer server.Stop()
   324  	host, port := hostAndPortFromAddress(t, server.Address)
   325  
   326  	// Configure an aggregate cluster pointing to a single LOGICAL_DNS cluster.
   327  	const dnsClusterName = clusterName + "-dns"
   328  	resources := e2e.UpdateOptions{
   329  		NodeID: nodeID,
   330  		Clusters: []*v3clusterpb.Cluster{
   331  			makeAggregateClusterResource(clusterName, []string{dnsClusterName}),
   332  			makeLogicalDNSClusterResource(dnsClusterName, host, uint32(port)),
   333  		},
   334  		SkipValidation: true,
   335  	}
   336  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   337  	defer cancel()
   338  	if err := managementServer.Update(ctx, resources); err != nil {
   339  		t.Fatal(err)
   340  	}
   341  
   342  	// Create xDS client, configure cds_experimental LB policy with a manual
   343  	// resolver, and dial the test backends.
   344  	cc, cleanup := setupAndDial(t, bootstrapContents)
   345  	defer cleanup()
   346  
   347  	// Make an RPC and ensure that it gets routed to the first backend since the
   348  	// child policy for a LOGICAL_DNS cluster is pick_first by default.
   349  	client := testgrpc.NewTestServiceClient(cc)
   350  	peer := &peer.Peer{}
   351  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil {
   352  		t.Fatalf("EmptyCall() failed: %v", err)
   353  	}
   354  	if peer.Addr.String() != server.Address {
   355  		t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, server.Address)
   356  	}
   357  }
   358  
   359  // Tests the case where the top-level cluster resource is an aggregate cluster
   360  // that resolves to a single LOGICAL_DNS cluster. The specified dns hostname is
   361  // expected to fail url parsing. The test verifies that the channel moves to
   362  // TRANSIENT_FAILURE.
   363  func (s) TestAggregateCluster_WithOneDNSCluster_ParseFailure(t *testing.T) {
   364  	// Start an xDS management server.
   365  	managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true})
   366  
   367  	// Create bootstrap configuration pointing to the above management server.
   368  	nodeID := uuid.New().String()
   369  	bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address)
   370  
   371  	// Configure an aggregate cluster pointing to a single LOGICAL_DNS cluster.
   372  	const dnsClusterName = clusterName + "-dns"
   373  	resources := e2e.UpdateOptions{
   374  		NodeID: nodeID,
   375  		Clusters: []*v3clusterpb.Cluster{
   376  			makeAggregateClusterResource(clusterName, []string{dnsClusterName}),
   377  			makeLogicalDNSClusterResource(dnsClusterName, "%gh&%ij", uint32(8080)),
   378  		},
   379  		SkipValidation: true,
   380  	}
   381  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   382  	defer cancel()
   383  	if err := managementServer.Update(ctx, resources); err != nil {
   384  		t.Fatal(err)
   385  	}
   386  
   387  	// Create xDS client, configure cds_experimental LB policy with a manual
   388  	// resolver, and dial the test backends.
   389  	cc, cleanup := setupAndDial(t, bootstrapContents)
   390  	defer cleanup()
   391  
   392  	// Ensure that the ClientConn moves to TransientFailure.
   393  	for state := cc.GetState(); state != connectivity.TransientFailure; state = cc.GetState() {
   394  		if !cc.WaitForStateChange(ctx, state) {
   395  			t.Fatalf("Timed out waiting for state change. got %v; want %v", state, connectivity.TransientFailure)
   396  		}
   397  	}
   398  }
   399  
   400  // Tests the case where the top-level cluster resource is an aggregate cluster
   401  // that resolves to a single LOGICAL_DNS cluster. The test verifies that RPCs
   402  // can be made to backends that make up the LOGICAL_DNS cluster. The hostname of
   403  // the LOGICAL_DNS cluster is updated, and the test verifies that RPCs can be
   404  // made to backends that the new hostname resolves to.
   405  func (s) TestAggregateCluster_WithOneDNSCluster_HostnameChange(t *testing.T) {
   406  	// Start an xDS management server.
   407  	managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true})
   408  
   409  	// Create bootstrap configuration pointing to the above management server.
   410  	nodeID := uuid.New().String()
   411  	bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address)
   412  
   413  	// Start two test backends and extract their host and port. The first
   414  	// backend is used initially for the LOGICAL_DNS cluster and an update
   415  	// switches the cluster to use the second backend.
   416  	servers, cleanup2 := startTestServiceBackends(t, 2)
   417  	defer cleanup2()
   418  
   419  	// Configure an aggregate cluster pointing to a single LOGICAL_DNS cluster.
   420  	const dnsClusterName = clusterName + "-dns"
   421  	dnsHostName, dnsPort := hostAndPortFromAddress(t, servers[0].Address)
   422  	resources := e2e.UpdateOptions{
   423  		NodeID: nodeID,
   424  		Clusters: []*v3clusterpb.Cluster{
   425  			makeAggregateClusterResource(clusterName, []string{dnsClusterName}),
   426  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
   427  		},
   428  		SkipValidation: true,
   429  	}
   430  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   431  	defer cancel()
   432  	if err := managementServer.Update(ctx, resources); err != nil {
   433  		t.Fatal(err)
   434  	}
   435  
   436  	// Create xDS client, configure cds_experimental LB policy with a manual
   437  	// resolver, and dial the test backends.
   438  	cc, cleanup := setupAndDial(t, bootstrapContents)
   439  	defer cleanup()
   440  
   441  	// Make an RPC and ensure that it gets routed to the first backend since the
   442  	// child policy for a LOGICAL_DNS cluster is pick_first by default.
   443  	client := testgrpc.NewTestServiceClient(cc)
   444  	peer := &peer.Peer{}
   445  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil {
   446  		t.Fatalf("EmptyCall() failed: %v", err)
   447  	}
   448  	if peer.Addr.String() != servers[0].Address {
   449  		t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, servers[0].Address)
   450  	}
   451  
   452  	// Update the LOGICAL_DNS cluster's hostname to point to the second backend.
   453  	dnsHostName, dnsPort = hostAndPortFromAddress(t, servers[1].Address)
   454  	resources = e2e.UpdateOptions{
   455  		NodeID: nodeID,
   456  		Clusters: []*v3clusterpb.Cluster{
   457  			makeAggregateClusterResource(clusterName, []string{dnsClusterName}),
   458  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
   459  		},
   460  		SkipValidation: true,
   461  	}
   462  	if err := managementServer.Update(ctx, resources); err != nil {
   463  		t.Fatal(err)
   464  	}
   465  
   466  	// Ensure that traffic moves to the second backend eventually.
   467  	for ctx.Err() == nil {
   468  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer)); err != nil {
   469  			t.Fatalf("EmptyCall() failed: %v", err)
   470  		}
   471  		if peer.Addr.String() == servers[1].Address {
   472  			break
   473  		}
   474  	}
   475  	if ctx.Err() != nil {
   476  		t.Fatal("Timeout when waiting for RPCs to switch to the second backend")
   477  	}
   478  }
   479  
   480  // TestAggregateCluster_WithEDSAndDNS tests the case where the top-level cluster
   481  // resource is an aggregate cluster that resolves to an EDS and a LOGICAL_DNS
   482  // cluster. The test verifies that RPCs fail until both clusters are resolved to
   483  // endpoints, and RPCs are routed to the higher priority EDS cluster.
   484  func (s) TestAggregateCluster_WithEDSAndDNS(t *testing.T) {
   485  	dnsTargetCh, dnsR := setupDNS(t)
   486  
   487  	// Start an xDS management server that pushes the name of the requested EDS
   488  	// resource onto a channel.
   489  	edsResourceCh := make(chan string, 1)
   490  	managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{
   491  		OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error {
   492  			if req.GetTypeUrl() != version.V3EndpointsURL {
   493  				return nil
   494  			}
   495  			if len(req.GetResourceNames()) == 0 {
   496  				// This happens at the end of the test when the grpc channel is
   497  				// being shut down and it is no longer interested in xDS
   498  				// resources.
   499  				return nil
   500  			}
   501  			select {
   502  			case edsResourceCh <- req.GetResourceNames()[0]:
   503  			default:
   504  			}
   505  			return nil
   506  		},
   507  		AllowResourceSubset: true,
   508  	})
   509  
   510  	// Create bootstrap configuration pointing to the above management server.
   511  	nodeID := uuid.New().String()
   512  	bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address)
   513  
   514  	// Start two test backends and extract their host and port. The first
   515  	// backend is used for the EDS cluster and the second backend is used for
   516  	// the LOGICAL_DNS cluster.
   517  	servers, cleanup3 := startTestServiceBackends(t, 2)
   518  	defer cleanup3()
   519  	addrs, ports := backendAddressesAndPorts(t, servers)
   520  
   521  	// Configure an aggregate cluster pointing to an EDS and DNS cluster. Also
   522  	// configure an endpoints resource for the EDS cluster.
   523  	const (
   524  		edsClusterName = clusterName + "-eds"
   525  		dnsClusterName = clusterName + "-dns"
   526  		dnsHostName    = "dns_host"
   527  		dnsPort        = uint32(8080)
   528  	)
   529  	resources := e2e.UpdateOptions{
   530  		NodeID: nodeID,
   531  		Clusters: []*v3clusterpb.Cluster{
   532  			makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}),
   533  			e2e.DefaultCluster(edsClusterName, "", e2e.SecurityLevelNone),
   534  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
   535  		},
   536  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(edsClusterName, "localhost", []uint32{uint32(ports[0])})},
   537  		SkipValidation: true,
   538  	}
   539  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   540  	defer cancel()
   541  	if err := managementServer.Update(ctx, resources); err != nil {
   542  		t.Fatal(err)
   543  	}
   544  
   545  	// Create xDS client, configure cds_experimental LB policy with a manual
   546  	// resolver, and dial the test backends.
   547  	cc, cleanup := setupAndDial(t, bootstrapContents)
   548  	defer cleanup()
   549  
   550  	// Ensure that an EDS request is sent for the expected resource name.
   551  	select {
   552  	case <-ctx.Done():
   553  		t.Fatal("Timeout when waiting for EDS request to be received on the management server")
   554  	case name := <-edsResourceCh:
   555  		if name != edsClusterName {
   556  			t.Fatalf("Received EDS request with resource name %q, want %q", name, edsClusterName)
   557  		}
   558  	}
   559  
   560  	// Ensure that the DNS resolver is started for the expected target.
   561  	select {
   562  	case <-ctx.Done():
   563  		t.Fatal("Timeout when waiting for DNS resolver to be started")
   564  	case target := <-dnsTargetCh:
   565  		got, want := target.Endpoint(), fmt.Sprintf("%s:%d", dnsHostName, dnsPort)
   566  		if got != want {
   567  			t.Fatalf("DNS resolution started for target %q, want %q", got, want)
   568  		}
   569  	}
   570  
   571  	// Make an RPC with a short deadline. We expect this RPC to not succeed
   572  	// because the DNS resolver has not responded with endpoint addresses.
   573  	client := testgrpc.NewTestServiceClient(cc)
   574  	sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   575  	defer sCancel()
   576  	if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded {
   577  		t.Fatalf("EmptyCall() code %s, want %s", status.Code(err), codes.DeadlineExceeded)
   578  	}
   579  
   580  	// Update DNS resolver with test backend addresses.
   581  	dnsR.UpdateState(resolver.State{Addresses: addrs[1:]})
   582  
   583  	// Make an RPC and ensure that it gets routed to the first backend since the
   584  	// EDS cluster is of higher priority than the LOGICAL_DNS cluster.
   585  	peer := &peer.Peer{}
   586  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil {
   587  		t.Fatalf("EmptyCall() failed: %v", err)
   588  	}
   589  	if peer.Addr.String() != addrs[0].Addr {
   590  		t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[0].Addr)
   591  	}
   592  }
   593  
   594  // TestAggregateCluster_SwitchEDSAndDNS tests the case where the top-level
   595  // cluster resource is an aggregate cluster. It initially resolves to a single
   596  // EDS cluster. The test verifies that RPCs are routed to backends in the EDS
   597  // cluster. Subsequently, the aggregate cluster resolves to a single DNS
   598  // cluster. The test verifies that RPCs are successful, this time to backends in
   599  // the DNS cluster.
   600  func (s) TestAggregateCluster_SwitchEDSAndDNS(t *testing.T) {
   601  	// Start an xDS management server.
   602  	managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true})
   603  
   604  	// Create bootstrap configuration pointing to the above management server.
   605  	nodeID := uuid.New().String()
   606  	bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address)
   607  
   608  	// Start two test backends and extract their host and port. The first
   609  	// backend is used for the EDS cluster and the second backend is used for
   610  	// the LOGICAL_DNS cluster.
   611  	servers, cleanup3 := startTestServiceBackends(t, 2)
   612  	defer cleanup3()
   613  	addrs, ports := backendAddressesAndPorts(t, servers)
   614  	dnsHostName, dnsPort := hostAndPortFromAddress(t, addrs[1].Addr)
   615  
   616  	// Configure an aggregate cluster pointing to a single EDS cluster. Also,
   617  	// configure the underlying EDS cluster (and the corresponding endpoints
   618  	// resource) and DNS cluster (will be used later in the test).
   619  	const dnsClusterName = clusterName + "-dns"
   620  	resources := e2e.UpdateOptions{
   621  		NodeID: nodeID,
   622  		Clusters: []*v3clusterpb.Cluster{
   623  			makeAggregateClusterResource(clusterName, []string{edsServiceName}),
   624  			e2e.DefaultCluster(edsServiceName, "", e2e.SecurityLevelNone),
   625  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
   626  		},
   627  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(edsServiceName, "localhost", []uint32{uint32(ports[0])})},
   628  		SkipValidation: true,
   629  	}
   630  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   631  	defer cancel()
   632  	if err := managementServer.Update(ctx, resources); err != nil {
   633  		t.Fatal(err)
   634  	}
   635  
   636  	// Create xDS client, configure cds_experimental LB policy with a manual
   637  	// resolver, and dial the test backends.
   638  	cc, cleanup := setupAndDial(t, bootstrapContents)
   639  	defer cleanup()
   640  
   641  	// Ensure that the RPC is routed to the appropriate backend.
   642  	client := testgrpc.NewTestServiceClient(cc)
   643  	peer := &peer.Peer{}
   644  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil {
   645  		t.Fatalf("EmptyCall() failed: %v", err)
   646  	}
   647  	if peer.Addr.String() != addrs[0].Addr {
   648  		t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[0].Addr)
   649  	}
   650  
   651  	// Update the aggregate cluster to point to a single DNS cluster.
   652  	resources.Clusters = []*v3clusterpb.Cluster{
   653  		makeAggregateClusterResource(clusterName, []string{dnsClusterName}),
   654  		e2e.DefaultCluster(edsServiceName, "", e2e.SecurityLevelNone),
   655  		makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
   656  	}
   657  	if err := managementServer.Update(ctx, resources); err != nil {
   658  		t.Fatal(err)
   659  	}
   660  
   661  	// Ensure that start getting routed to the backend corresponding to the
   662  	// LOGICAL_DNS cluster.
   663  	for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) {
   664  		client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer))
   665  		if peer.Addr.String() == addrs[1].Addr {
   666  			break
   667  		}
   668  	}
   669  	if ctx.Err() != nil {
   670  		t.Fatalf("Timeout when waiting for RPCs to be routed to backend %q in the DNS cluster", addrs[1].Addr)
   671  	}
   672  }
   673  
   674  // TestAggregateCluster_BadEDS_GoodToBadDNS tests the case where the top-level
   675  // cluster is an aggregate cluster that resolves to an EDS and LOGICAL_DNS
   676  // cluster. The test first asserts that no RPCs can be made after receiving an
   677  // EDS response with zero endpoints because no update has been received from the
   678  // DNS resolver yet. Once the DNS resolver pushes an update, the test verifies
   679  // that we switch to the DNS cluster and can make a successful RPC. At this
   680  // point when the DNS cluster returns an error, the test verifies that RPCs are
   681  // still successful. This is the expected behavior because the cluster resolver
   682  // policy eats errors from DNS Resolver after it has returned an error.
   683  func (s) TestAggregateCluster_BadEDS_GoodToBadDNS(t *testing.T) {
   684  	dnsTargetCh, dnsR := setupDNS(t)
   685  
   686  	// Start an xDS management server.
   687  	managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true})
   688  
   689  	// Create bootstrap configuration pointing to the above management server.
   690  	nodeID := uuid.New().String()
   691  	bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address)
   692  
   693  	// Start two test backends.
   694  	servers, cleanup3 := startTestServiceBackends(t, 2)
   695  	defer cleanup3()
   696  	addrs, _ := backendAddressesAndPorts(t, servers)
   697  
   698  	// Configure an aggregate cluster pointing to an EDS and LOGICAL_DNS
   699  	// cluster. Also configure an endpoints resource for the EDS cluster which
   700  	// triggers a NACK.
   701  	const (
   702  		edsClusterName = clusterName + "-eds"
   703  		dnsClusterName = clusterName + "-dns"
   704  		dnsHostName    = "dns_host"
   705  		dnsPort        = uint32(8080)
   706  	)
   707  	emptyEndpointResource := e2e.DefaultEndpoint(edsServiceName, "localhost", nil)
   708  	resources := e2e.UpdateOptions{
   709  		NodeID: nodeID,
   710  		Clusters: []*v3clusterpb.Cluster{
   711  			makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}),
   712  			e2e.DefaultCluster(edsClusterName, edsServiceName, e2e.SecurityLevelNone),
   713  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
   714  		},
   715  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{emptyEndpointResource},
   716  		SkipValidation: true,
   717  	}
   718  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   719  	defer cancel()
   720  	if err := managementServer.Update(ctx, resources); err != nil {
   721  		t.Fatal(err)
   722  	}
   723  
   724  	// Create xDS client, configure cds_experimental LB policy with a manual
   725  	// resolver, and dial the test backends.
   726  	cc, cleanup := setupAndDial(t, bootstrapContents)
   727  	defer cleanup()
   728  
   729  	// Make an RPC with a short deadline. We expect this RPC to not succeed
   730  	// because the EDS resource came back with no endpoints, and we are yet to
   731  	// push an update through the DNS resolver.
   732  	client := testgrpc.NewTestServiceClient(cc)
   733  	sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   734  	defer sCancel()
   735  	if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded {
   736  		t.Fatalf("EmptyCall() code %s, want %s", status.Code(err), codes.DeadlineExceeded)
   737  	}
   738  
   739  	// Ensure that the DNS resolver is started for the expected target.
   740  	select {
   741  	case <-ctx.Done():
   742  		t.Fatal("Timeout when waiting for DNS resolver to be started")
   743  	case target := <-dnsTargetCh:
   744  		got, want := target.Endpoint(), fmt.Sprintf("%s:%d", dnsHostName, dnsPort)
   745  		if got != want {
   746  			t.Fatalf("DNS resolution started for target %q, want %q", got, want)
   747  		}
   748  	}
   749  
   750  	// Update DNS resolver with test backend addresses.
   751  	dnsR.UpdateState(resolver.State{Addresses: addrs})
   752  
   753  	// Ensure that RPCs start getting routed to the first backend since the
   754  	// child policy for a LOGICAL_DNS cluster is pick_first by default.
   755  	for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) {
   756  		peer := &peer.Peer{}
   757  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer)); err != nil {
   758  			t.Logf("EmptyCall() failed: %v", err)
   759  			continue
   760  		}
   761  		if peer.Addr.String() == addrs[0].Addr {
   762  			break
   763  		}
   764  	}
   765  	if ctx.Err() != nil {
   766  		t.Fatalf("Timeout when waiting for RPCs to be routed to backend %q in the DNS cluster", addrs[0].Addr)
   767  	}
   768  
   769  	// Push an error from the DNS resolver as well.
   770  	dnsErr := fmt.Errorf("DNS error")
   771  	dnsR.CC().ReportError(dnsErr)
   772  
   773  	// Ensure that RPCs continue to succeed for the next second.
   774  	for end := time.Now().Add(time.Second); time.Now().Before(end); <-time.After(defaultTestShortTimeout) {
   775  		peer := &peer.Peer{}
   776  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer)); err != nil {
   777  			t.Fatalf("EmptyCall() failed: %v", err)
   778  		}
   779  		if peer.Addr.String() != addrs[0].Addr {
   780  			t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[0].Addr)
   781  		}
   782  	}
   783  }
   784  
   785  // TestAggregateCluster_BadEDS_GoodToBadDNS tests the case where the top-level
   786  // cluster is an aggregate cluster that resolves to an EDS and LOGICAL_DNS
   787  // cluster. The test first sends an EDS response which triggers an NACK. Once
   788  // the DNS resolver pushes an update, the test verifies that we switch to the
   789  // DNS cluster and can make a successful RPC.
   790  func (s) TestAggregateCluster_BadEDSFromError_GoodToBadDNS(t *testing.T) {
   791  	// Start an xDS management server.
   792  	managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true})
   793  
   794  	// Create bootstrap configuration pointing to the above management server.
   795  	nodeID := uuid.New().String()
   796  	bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address)
   797  
   798  	// Start a test service backend.
   799  	server := stubserver.StartTestService(t, nil)
   800  	defer server.Stop()
   801  	dnsHostName, dnsPort := hostAndPortFromAddress(t, server.Address)
   802  
   803  	// Configure an aggregate cluster pointing to an EDS and LOGICAL_DNS
   804  	// cluster. Also configure an empty endpoints resource for the EDS cluster
   805  	// that contains no endpoints.
   806  	const (
   807  		edsClusterName = clusterName + "-eds"
   808  		dnsClusterName = clusterName + "-dns"
   809  	)
   810  	nackEndpointResource := e2e.DefaultEndpoint(edsServiceName, "localhost", nil)
   811  	nackEndpointResource.Endpoints = []*v3endpointpb.LocalityLbEndpoints{
   812  		{
   813  			LoadBalancingWeight: &wrapperspb.UInt32Value{
   814  				Value: 0, // causes an NACK
   815  			},
   816  		},
   817  	}
   818  	resources := e2e.UpdateOptions{
   819  		NodeID: nodeID,
   820  		Clusters: []*v3clusterpb.Cluster{
   821  			makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}),
   822  			e2e.DefaultCluster(edsClusterName, edsServiceName, e2e.SecurityLevelNone),
   823  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
   824  		},
   825  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{nackEndpointResource},
   826  		SkipValidation: true,
   827  	}
   828  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   829  	defer cancel()
   830  	if err := managementServer.Update(ctx, resources); err != nil {
   831  		t.Fatal(err)
   832  	}
   833  
   834  	// Create xDS client, configure cds_experimental LB policy with a manual
   835  	// resolver, and dial the test backends.
   836  	cc, cleanup := setupAndDial(t, bootstrapContents)
   837  	defer cleanup()
   838  
   839  	// Ensure that RPCs start getting routed to the first backend since the
   840  	// child policy for a LOGICAL_DNS cluster is pick_first by default.
   841  	pickfirst.CheckRPCsToBackend(ctx, cc, resolver.Address{Addr: server.Address})
   842  }
   843  
   844  // TestAggregateCluster_BadDNS_GoodEDS tests the case where the top-level
   845  // cluster is an aggregate cluster that resolves to an LOGICAL_DNS and EDS
   846  // cluster. When the DNS Resolver returns an error and EDS cluster returns a
   847  // good update, this test verifies the cluster_resolver balancer correctly falls
   848  // back from the LOGICAL_DNS cluster to the EDS cluster.
   849  func (s) TestAggregateCluster_BadDNS_GoodEDS(t *testing.T) {
   850  	dnsTargetCh, dnsR := setupDNS(t)
   851  	// Start an xDS management server.
   852  	managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true})
   853  
   854  	// Create bootstrap configuration pointing to the above management server.
   855  	nodeID := uuid.New().String()
   856  	bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address)
   857  
   858  	// Start a test service backend.
   859  	server := stubserver.StartTestService(t, nil)
   860  	defer server.Stop()
   861  	_, edsPort := hostAndPortFromAddress(t, server.Address)
   862  
   863  	// Configure an aggregate cluster pointing to an LOGICAL_DNS and EDS
   864  	// cluster. Also configure an endpoints resource for the EDS cluster.
   865  	const (
   866  		edsClusterName = clusterName + "-eds"
   867  		dnsClusterName = clusterName + "-dns"
   868  		dnsHostName    = "bad.ip.v4.address"
   869  		dnsPort        = 8080
   870  	)
   871  	resources := e2e.UpdateOptions{
   872  		NodeID: nodeID,
   873  		Clusters: []*v3clusterpb.Cluster{
   874  			makeAggregateClusterResource(clusterName, []string{dnsClusterName, edsClusterName}),
   875  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
   876  			e2e.DefaultCluster(edsClusterName, edsServiceName, e2e.SecurityLevelNone),
   877  		},
   878  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(edsServiceName, "localhost", []uint32{uint32(edsPort)})},
   879  		SkipValidation: true,
   880  	}
   881  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   882  	defer cancel()
   883  	if err := managementServer.Update(ctx, resources); err != nil {
   884  		t.Fatal(err)
   885  	}
   886  
   887  	// Create xDS client, configure cds_experimental LB policy with a manual
   888  	// resolver, and dial the test backends.
   889  	cc, cleanup := setupAndDial(t, bootstrapContents)
   890  	defer cleanup()
   891  
   892  	// Ensure that the DNS resolver is started for the expected target.
   893  	select {
   894  	case <-ctx.Done():
   895  		t.Fatal("Timeout when waiting for DNS resolver to be started")
   896  	case target := <-dnsTargetCh:
   897  		got, want := target.Endpoint(), fmt.Sprintf("%s:%d", dnsHostName, dnsPort)
   898  		if got != want {
   899  			t.Fatalf("DNS resolution started for target %q, want %q", got, want)
   900  		}
   901  	}
   902  
   903  	// Produce a bad resolver update from the DNS resolver.
   904  	dnsErr := fmt.Errorf("DNS error")
   905  	dnsR.CC().ReportError(dnsErr)
   906  
   907  	// RPCs should work, higher level DNS cluster errors so should fallback to
   908  	// EDS cluster.
   909  	client := testgrpc.NewTestServiceClient(cc)
   910  	peer := &peer.Peer{}
   911  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil {
   912  		t.Fatalf("EmptyCall() failed: %v", err)
   913  	}
   914  	if peer.Addr.String() != server.Address {
   915  		t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, server.Address)
   916  	}
   917  }
   918  
   919  // TestAggregateCluster_BadEDS_BadDNS tests the case where the top-level cluster
   920  // is an aggregate cluster that resolves to an EDS and LOGICAL_DNS cluster. When
   921  // the EDS request returns a resource that contains no endpoints, the test
   922  // verifies that we switch to the DNS cluster. When the DNS cluster returns an
   923  // error, the test verifies that RPCs fail with the error triggered by the DNS
   924  // Discovery Mechanism (from sending an empty address list down).
   925  func (s) TestAggregateCluster_BadEDS_BadDNS(t *testing.T) {
   926  	dnsTargetCh, dnsR := setupDNS(t)
   927  	// Start an xDS management server.
   928  	managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true})
   929  
   930  	// Create bootstrap configuration pointing to the above management server.
   931  	nodeID := uuid.New().String()
   932  	bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address)
   933  
   934  	// Configure an aggregate cluster pointing to an EDS and LOGICAL_DNS
   935  	// cluster. Also configure an empty endpoints resource for the EDS cluster
   936  	// that contains no endpoints.
   937  	const (
   938  		edsClusterName = clusterName + "-eds"
   939  		dnsClusterName = clusterName + "-dns"
   940  		dnsHostName    = "bad.ip.v4.address"
   941  		dnsPort        = 8080
   942  	)
   943  	emptyEndpointResource := e2e.DefaultEndpoint(edsServiceName, "localhost", nil)
   944  	resources := e2e.UpdateOptions{
   945  		NodeID: nodeID,
   946  		Clusters: []*v3clusterpb.Cluster{
   947  			makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}),
   948  			e2e.DefaultCluster(edsClusterName, edsServiceName, e2e.SecurityLevelNone),
   949  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
   950  		},
   951  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{emptyEndpointResource},
   952  		SkipValidation: true,
   953  	}
   954  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   955  	defer cancel()
   956  	if err := managementServer.Update(ctx, resources); err != nil {
   957  		t.Fatal(err)
   958  	}
   959  
   960  	// Create xDS client, configure cds_experimental LB policy with a manual
   961  	// resolver, and dial the test backends.
   962  	cc, cleanup := setupAndDial(t, bootstrapContents)
   963  	defer cleanup()
   964  
   965  	// Ensure that the DNS resolver is started for the expected target.
   966  	select {
   967  	case <-ctx.Done():
   968  		t.Fatal("Timeout when waiting for DNS resolver to be started")
   969  	case target := <-dnsTargetCh:
   970  		got, want := target.Endpoint(), fmt.Sprintf("%s:%d", dnsHostName, dnsPort)
   971  		if got != want {
   972  			t.Fatalf("DNS resolution started for target %q, want %q", got, want)
   973  		}
   974  	}
   975  
   976  	// Produce a bad resolver update from the DNS resolver.
   977  	dnsR.CC().ReportError(fmt.Errorf("DNS error"))
   978  
   979  	// Ensure that the error from the DNS Resolver leads to an empty address
   980  	// update for both priorities.
   981  	client := testgrpc.NewTestServiceClient(cc)
   982  	for ctx.Err() == nil {
   983  		_, err := client.EmptyCall(ctx, &testpb.Empty{})
   984  		if err == nil {
   985  			t.Fatal("EmptyCall() succeeded when expected to fail")
   986  		}
   987  		if status.Code(err) == codes.Unavailable && strings.Contains(err.Error(), "produced zero addresses") {
   988  			break
   989  		}
   990  	}
   991  	if ctx.Err() != nil {
   992  		t.Fatalf("Timeout when waiting for RPCs to fail with expected code and error")
   993  	}
   994  }
   995  
   996  // TestAggregateCluster_NoFallback_EDSNackedWithPreviousGoodUpdate tests the
   997  // scenario where the top-level cluster is an aggregate cluster that resolves to
   998  // an EDS and LOGICAL_DNS cluster. The management server first sends a good EDS
   999  // response for the EDS cluster and the test verifies that RPCs get routed to
  1000  // the EDS cluster. The management server then sends a bad EDS response. The
  1001  // test verifies that the cluster_resolver LB policy continues to use the
  1002  // previously received good update and that RPCs still get routed to the EDS
  1003  // cluster.
  1004  func (s) TestAggregateCluster_NoFallback_EDSNackedWithPreviousGoodUpdate(t *testing.T) {
  1005  	// Start an xDS management server.
  1006  	managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true})
  1007  
  1008  	// Create bootstrap configuration pointing to the above management server.
  1009  	nodeID := uuid.New().String()
  1010  	bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address)
  1011  
  1012  	// Start two test backends and extract their host and port. The first
  1013  	// backend is used for the EDS cluster and the second backend is used for
  1014  	// the LOGICAL_DNS cluster.
  1015  	servers, cleanup3 := startTestServiceBackends(t, 2)
  1016  	defer cleanup3()
  1017  	addrs, ports := backendAddressesAndPorts(t, servers)
  1018  	dnsHostName, dnsPort := hostAndPortFromAddress(t, servers[1].Address)
  1019  
  1020  	// Configure an aggregate cluster pointing to an EDS and DNS cluster. Also
  1021  	// configure an endpoints resource for the EDS cluster.
  1022  	const (
  1023  		edsClusterName = clusterName + "-eds"
  1024  		dnsClusterName = clusterName + "-dns"
  1025  	)
  1026  	resources := e2e.UpdateOptions{
  1027  		NodeID: nodeID,
  1028  		Clusters: []*v3clusterpb.Cluster{
  1029  			makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}),
  1030  			e2e.DefaultCluster(edsClusterName, "", e2e.SecurityLevelNone),
  1031  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
  1032  		},
  1033  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(edsClusterName, "localhost", []uint32{uint32(ports[0])})},
  1034  		SkipValidation: true,
  1035  	}
  1036  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1037  	defer cancel()
  1038  	if err := managementServer.Update(ctx, resources); err != nil {
  1039  		t.Fatal(err)
  1040  	}
  1041  
  1042  	// Create xDS client, configure cds_experimental LB policy with a manual
  1043  	// resolver, and dial the test backends.
  1044  	cc, cleanup := setupAndDial(t, bootstrapContents)
  1045  	defer cleanup()
  1046  
  1047  	// Make an RPC and ensure that it gets routed to the first backend since the
  1048  	// EDS cluster is of higher priority than the LOGICAL_DNS cluster.
  1049  	client := testgrpc.NewTestServiceClient(cc)
  1050  	peer := &peer.Peer{}
  1051  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil {
  1052  		t.Fatalf("EmptyCall() failed: %v", err)
  1053  	}
  1054  	if peer.Addr.String() != addrs[0].Addr {
  1055  		t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[0].Addr)
  1056  	}
  1057  
  1058  	// Push an EDS resource from the management server that is expected to be
  1059  	// NACKed by the xDS client. Since the cluster_resolver LB policy has a
  1060  	// previously received good EDS resource, it will continue to use that.
  1061  	resources.Endpoints[0].Endpoints[0].LbEndpoints[0].LoadBalancingWeight = &wrapperspb.UInt32Value{Value: 0}
  1062  	if err := managementServer.Update(ctx, resources); err != nil {
  1063  		t.Fatal(err)
  1064  	}
  1065  
  1066  	// Ensure that RPCs continue to get routed to the EDS cluster for the next
  1067  	// second.
  1068  	for end := time.Now().Add(time.Second); time.Now().Before(end); <-time.After(defaultTestShortTimeout) {
  1069  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer)); err != nil {
  1070  			t.Fatalf("EmptyCall() failed: %v", err)
  1071  		}
  1072  		if peer.Addr.String() != addrs[0].Addr {
  1073  			t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[0].Addr)
  1074  		}
  1075  	}
  1076  }
  1077  
  1078  // TestAggregateCluster_Fallback_EDSNackedWithoutPreviousGoodUpdate tests the
  1079  // scenario where the top-level cluster is an aggregate cluster that resolves to
  1080  // an EDS and LOGICAL_DNS cluster.  The management server sends a bad EDS
  1081  // response. The test verifies that the cluster_resolver LB policy falls back to
  1082  // the LOGICAL_DNS cluster, because it is supposed to treat the bad EDS response
  1083  // as though it received an update with no endpoints.
  1084  func (s) TestAggregateCluster_Fallback_EDSNackedWithoutPreviousGoodUpdate(t *testing.T) {
  1085  	// Start an xDS management server.
  1086  	managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true})
  1087  
  1088  	// Create bootstrap configuration pointing to the above management server.
  1089  	nodeID := uuid.New().String()
  1090  	bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address)
  1091  
  1092  	// Start two test backends and extract their host and port. The first
  1093  	// backend is used for the EDS cluster and the second backend is used for
  1094  	// the LOGICAL_DNS cluster.
  1095  	servers, cleanup3 := startTestServiceBackends(t, 2)
  1096  	defer cleanup3()
  1097  	addrs, ports := backendAddressesAndPorts(t, servers)
  1098  	dnsHostName, dnsPort := hostAndPortFromAddress(t, servers[1].Address)
  1099  
  1100  	// Configure an aggregate cluster pointing to an EDS and DNS cluster.
  1101  	const (
  1102  		edsClusterName = clusterName + "-eds"
  1103  		dnsClusterName = clusterName + "-dns"
  1104  	)
  1105  	resources := e2e.UpdateOptions{
  1106  		NodeID: nodeID,
  1107  		Clusters: []*v3clusterpb.Cluster{
  1108  			makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}),
  1109  			e2e.DefaultCluster(edsClusterName, "", e2e.SecurityLevelNone),
  1110  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
  1111  		},
  1112  		Endpoints:      []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(edsClusterName, "localhost", []uint32{uint32(ports[0])})},
  1113  		SkipValidation: true,
  1114  	}
  1115  
  1116  	// Set a load balancing weight of 0 for the backend in the EDS resource.
  1117  	// This is expected to be NACKed by the xDS client. Since the
  1118  	// cluster_resolver LB policy has no previously received good EDS resource,
  1119  	// it will treat this as though it received an update with no endpoints.
  1120  	resources.Endpoints[0].Endpoints[0].LbEndpoints[0].LoadBalancingWeight = &wrapperspb.UInt32Value{Value: 0}
  1121  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1122  	defer cancel()
  1123  	if err := managementServer.Update(ctx, resources); err != nil {
  1124  		t.Fatal(err)
  1125  	}
  1126  
  1127  	// Create xDS client, configure cds_experimental LB policy with a manual
  1128  	// resolver, and dial the test backends.
  1129  	cc, cleanup := setupAndDial(t, bootstrapContents)
  1130  	defer cleanup()
  1131  
  1132  	// Make an RPC and ensure that it gets routed to the LOGICAL_DNS cluster.
  1133  	peer := &peer.Peer{}
  1134  	client := testgrpc.NewTestServiceClient(cc)
  1135  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil {
  1136  		t.Fatalf("EmptyCall() failed: %v", err)
  1137  	}
  1138  	if peer.Addr.String() != addrs[1].Addr {
  1139  		t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, addrs[1].Addr)
  1140  	}
  1141  }
  1142  
  1143  // TestAggregateCluster_Fallback_EDS_ResourceNotFound tests the scenario where
  1144  // the top-level cluster is an aggregate cluster that resolves to an EDS and
  1145  // LOGICAL_DNS cluster.  The management server does not respond with the EDS
  1146  // cluster. The test verifies that the cluster_resolver LB policy falls back to
  1147  // the LOGICAL_DNS cluster in this case.
  1148  func (s) TestAggregateCluster_Fallback_EDS_ResourceNotFound(t *testing.T) {
  1149  	// Start an xDS management server.
  1150  	managementServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{AllowResourceSubset: true})
  1151  
  1152  	// Create bootstrap configuration pointing to the above management server.
  1153  	nodeID := uuid.New().String()
  1154  	bootstrapContents := e2e.DefaultBootstrapContents(t, nodeID, managementServer.Address)
  1155  
  1156  	// Start a test backend for the LOGICAL_DNS cluster.
  1157  	server := stubserver.StartTestService(t, nil)
  1158  	defer server.Stop()
  1159  	dnsHostName, dnsPort := hostAndPortFromAddress(t, server.Address)
  1160  
  1161  	// Configure an aggregate cluster pointing to an EDS and DNS cluster. No
  1162  	// endpoints are configured for the EDS cluster.
  1163  	const (
  1164  		edsClusterName = clusterName + "-eds"
  1165  		dnsClusterName = clusterName + "-dns"
  1166  	)
  1167  	resources := e2e.UpdateOptions{
  1168  		NodeID: nodeID,
  1169  		Clusters: []*v3clusterpb.Cluster{
  1170  			makeAggregateClusterResource(clusterName, []string{edsClusterName, dnsClusterName}),
  1171  			e2e.DefaultCluster(edsClusterName, "", e2e.SecurityLevelNone),
  1172  			makeLogicalDNSClusterResource(dnsClusterName, dnsHostName, dnsPort),
  1173  		},
  1174  		SkipValidation: true,
  1175  	}
  1176  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1177  	defer cancel()
  1178  	if err := managementServer.Update(ctx, resources); err != nil {
  1179  		t.Fatal(err)
  1180  	}
  1181  
  1182  	// Create an xDS client talking to the above management server, configured
  1183  	// with a short watch expiry timeout.
  1184  	config, err := bootstrap.NewConfigFromContents(bootstrapContents)
  1185  	if err != nil {
  1186  		t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err)
  1187  	}
  1188  	pool := xdsclient.NewPool(config)
  1189  	xdsClient, close, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{
  1190  		Name:               t.Name(),
  1191  		WatchExpiryTimeout: defaultTestWatchExpiryTimeout,
  1192  	})
  1193  	if err != nil {
  1194  		t.Fatalf("Failed to create an xDS client: %v", err)
  1195  	}
  1196  	defer close()
  1197  
  1198  	// Create a manual resolver and push a service config specifying the use of
  1199  	// the cds LB policy as the top-level LB policy, and a corresponding config
  1200  	// with a single cluster.
  1201  	r := manual.NewBuilderWithScheme("whatever")
  1202  	jsonSC := fmt.Sprintf(`{
  1203  			"loadBalancingConfig":[{
  1204  				"cds_experimental":{
  1205  					"cluster": "%s"
  1206  				}
  1207  			}]
  1208  		}`, clusterName)
  1209  	scpr := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(jsonSC)
  1210  	r.InitialState(xdsclient.SetClient(resolver.State{ServiceConfig: scpr}, xdsClient))
  1211  
  1212  	// Create a ClientConn.
  1213  	cc, err := grpc.NewClient(r.Scheme()+":///test.service", grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(r))
  1214  	if err != nil {
  1215  		t.Fatalf("failed to create new client for local test server: %v", err)
  1216  	}
  1217  	defer cc.Close()
  1218  
  1219  	// Make an RPC and ensure that it gets routed to the LOGICAL_DNS cluster.
  1220  	// Even though the EDS cluster is of higher priority, since the management
  1221  	// server does not respond with an EDS resource, the cluster_resolver LB
  1222  	// policy is expected to fallback to the LOGICAL_DNS cluster once the watch
  1223  	// timeout expires.
  1224  	peer := &peer.Peer{}
  1225  	client := testgrpc.NewTestServiceClient(cc)
  1226  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(peer), grpc.WaitForReady(true)); err != nil {
  1227  		t.Fatalf("EmptyCall() failed: %v", err)
  1228  	}
  1229  	if peer.Addr.String() != server.Address {
  1230  		t.Fatalf("EmptyCall() routed to backend %q, want %q", peer.Addr, server.Address)
  1231  	}
  1232  }