google.golang.org/grpc@v1.72.2/test/xds/xds_client_ignore_resource_deletion_test.go (about)

     1  /*
     2   *
     3   * Copyright 2023 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package xds_test
    20  
    21  import (
    22  	"context"
    23  	"encoding/json"
    24  	"fmt"
    25  	"net"
    26  	"strings"
    27  	"sync"
    28  	"testing"
    29  	"time"
    30  
    31  	"github.com/google/uuid"
    32  	"google.golang.org/grpc"
    33  	"google.golang.org/grpc/codes"
    34  	"google.golang.org/grpc/connectivity"
    35  	"google.golang.org/grpc/credentials/insecure"
    36  	"google.golang.org/grpc/internal"
    37  	"google.golang.org/grpc/internal/stubserver"
    38  	"google.golang.org/grpc/internal/testutils"
    39  	"google.golang.org/grpc/internal/testutils/xds/e2e"
    40  	"google.golang.org/grpc/internal/xds/bootstrap"
    41  	"google.golang.org/grpc/resolver"
    42  	"google.golang.org/grpc/status"
    43  	"google.golang.org/grpc/xds"
    44  
    45  	clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
    46  	endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
    47  	listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3"
    48  	routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3"
    49  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    50  	testpb "google.golang.org/grpc/interop/grpc_testing"
    51  )
    52  
    53  const (
    54  	serviceName = "my-service-xds"
    55  	rdsName     = "route-" + serviceName
    56  	cdsName1    = "cluster1-" + serviceName
    57  	cdsName2    = "cluster2-" + serviceName
    58  	edsName1    = "eds1-" + serviceName
    59  	edsName2    = "eds2-" + serviceName
    60  )
    61  
    62  var (
    63  	// This route configuration resource contains two routes:
    64  	// - a route for the EmptyCall rpc, to be sent to cluster1
    65  	// - a route for the UnaryCall rpc, to be sent to cluster2
    66  	defaultRouteConfigWithTwoRoutes = &routepb.RouteConfiguration{
    67  		Name: rdsName,
    68  		VirtualHosts: []*routepb.VirtualHost{{
    69  			Domains: []string{serviceName},
    70  			Routes: []*routepb.Route{
    71  				{
    72  					Match: &routepb.RouteMatch{PathSpecifier: &routepb.RouteMatch_Prefix{Prefix: "/grpc.testing.TestService/EmptyCall"}},
    73  					Action: &routepb.Route_Route{Route: &routepb.RouteAction{
    74  						ClusterSpecifier: &routepb.RouteAction_Cluster{Cluster: cdsName1},
    75  					}},
    76  				},
    77  				{
    78  					Match: &routepb.RouteMatch{PathSpecifier: &routepb.RouteMatch_Prefix{Prefix: "/grpc.testing.TestService/UnaryCall"}},
    79  					Action: &routepb.Route_Route{Route: &routepb.RouteAction{
    80  						ClusterSpecifier: &routepb.RouteAction_Cluster{Cluster: cdsName2},
    81  					}},
    82  				},
    83  			},
    84  		}},
    85  	}
    86  )
    87  
    88  // This test runs subtest each for a Listener resource and a Cluster resource deletion
    89  // in the response from the server for the following cases:
    90  //   - testResourceDeletionIgnored: When ignore_resource_deletion is set, the
    91  //     xDSClient should not delete the resource.
    92  //   - testResourceDeletionNotIgnored: When ignore_resource_deletion is unset,
    93  //     the xDSClient should delete the resource.
    94  //
    95  // Resource deletion is only applicable to Listener and Cluster resources.
    96  func (s) TestIgnoreResourceDeletionOnClient(t *testing.T) {
    97  	server1 := stubserver.StartTestService(t, nil)
    98  	t.Cleanup(server1.Stop)
    99  
   100  	server2 := stubserver.StartTestService(t, nil)
   101  	t.Cleanup(server2.Stop)
   102  
   103  	initialResourceOnServer := func(nodeID string) e2e.UpdateOptions {
   104  		return e2e.UpdateOptions{
   105  			NodeID:    nodeID,
   106  			Listeners: []*listenerpb.Listener{e2e.DefaultClientListener(serviceName, rdsName)},
   107  			Routes:    []*routepb.RouteConfiguration{defaultRouteConfigWithTwoRoutes},
   108  			Clusters: []*clusterpb.Cluster{
   109  				e2e.DefaultCluster(cdsName1, edsName1, e2e.SecurityLevelNone),
   110  				e2e.DefaultCluster(cdsName2, edsName2, e2e.SecurityLevelNone),
   111  			},
   112  			Endpoints: []*endpointpb.ClusterLoadAssignment{
   113  				e2e.DefaultEndpoint(edsName1, "localhost", []uint32{testutils.ParsePort(t, server1.Address)}),
   114  				e2e.DefaultEndpoint(edsName2, "localhost", []uint32{testutils.ParsePort(t, server2.Address)}),
   115  			},
   116  			SkipValidation: true,
   117  		}
   118  	}
   119  
   120  	tests := []struct {
   121  		name           string
   122  		updateResource func(r *e2e.UpdateOptions)
   123  	}{
   124  		{
   125  			name: "listener",
   126  			updateResource: func(r *e2e.UpdateOptions) {
   127  				r.Listeners = nil
   128  			},
   129  		},
   130  		{
   131  			name: "cluster",
   132  			updateResource: func(r *e2e.UpdateOptions) {
   133  				r.Clusters = nil
   134  			},
   135  		},
   136  	}
   137  	for _, test := range tests {
   138  		t.Run(fmt.Sprintf("%s resource deletion ignored", test.name), func(t *testing.T) {
   139  			testResourceDeletionIgnored(t, initialResourceOnServer, test.updateResource)
   140  		})
   141  		t.Run(fmt.Sprintf("%s resource deletion not ignored", test.name), func(t *testing.T) {
   142  			testResourceDeletionNotIgnored(t, initialResourceOnServer, test.updateResource)
   143  		})
   144  	}
   145  }
   146  
   147  // This subtest tests the scenario where the bootstrap config has "ignore_resource_deletion"
   148  // set in "server_features" field. This subtest verifies that the resource was
   149  // not deleted by the xDSClient when a resource is missing the xDS response and
   150  // RPCs continue to succeed.
   151  func testResourceDeletionIgnored(t *testing.T, initialResource func(string) e2e.UpdateOptions, updateResource func(r *e2e.UpdateOptions)) {
   152  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   153  	t.Cleanup(cancel)
   154  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{})
   155  	nodeID := uuid.New().String()
   156  	bs := generateBootstrapContents(t, mgmtServer.Address, true, nodeID)
   157  	xdsR := xdsResolverBuilder(t, bs)
   158  	resources := initialResource(nodeID)
   159  
   160  	// Update the management server with initial resources setup.
   161  	if err := mgmtServer.Update(ctx, resources); err != nil {
   162  		t.Fatal(err)
   163  	}
   164  
   165  	cc, err := grpc.NewClient(fmt.Sprintf("xds:///%s", serviceName), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(xdsR))
   166  	if err != nil {
   167  		t.Fatalf("Failed to dial local test server: %v.", err)
   168  	}
   169  	t.Cleanup(func() { cc.Close() })
   170  
   171  	if err := verifyRPCtoAllEndpoints(cc); err != nil {
   172  		t.Fatal(err)
   173  	}
   174  
   175  	// Mutate resource and update on the server.
   176  	updateResource(&resources)
   177  	if err := mgmtServer.Update(ctx, resources); err != nil {
   178  		t.Fatal(err)
   179  	}
   180  
   181  	// Make an RPC every 50ms for the next 500ms. This is to ensure that the
   182  	// updated resource is received from the management server and is processed by
   183  	// gRPC. Since resource deletions are ignored by the xDS client, we expect RPCs
   184  	// to all endpoints to keep succeeding.
   185  	timer := time.NewTimer(500 * time.Millisecond)
   186  	ticker := time.NewTicker(50 * time.Millisecond)
   187  	t.Cleanup(ticker.Stop)
   188  	for {
   189  		if err := verifyRPCtoAllEndpoints(cc); err != nil {
   190  			t.Fatal(err)
   191  		}
   192  		select {
   193  		case <-ctx.Done():
   194  			return
   195  		case <-timer.C:
   196  			return
   197  		case <-ticker.C:
   198  		}
   199  	}
   200  }
   201  
   202  // This subtest tests the scenario where the bootstrap config has "ignore_resource_deletion"
   203  // not set in "server_features" field. This subtest verifies that the resource was
   204  // deleted by the xDSClient when a resource is missing the xDS response and subsequent
   205  // RPCs fail.
   206  func testResourceDeletionNotIgnored(t *testing.T, initialResource func(string) e2e.UpdateOptions, updateResource func(r *e2e.UpdateOptions)) {
   207  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   208  	t.Cleanup(cancel)
   209  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{})
   210  	nodeID := uuid.New().String()
   211  	bs := generateBootstrapContents(t, mgmtServer.Address, false, nodeID)
   212  	xdsR := xdsResolverBuilder(t, bs)
   213  	resources := initialResource(nodeID)
   214  
   215  	// Update the management server with initial resources setup.
   216  	if err := mgmtServer.Update(ctx, resources); err != nil {
   217  		t.Fatal(err)
   218  	}
   219  
   220  	cc, err := grpc.NewClient(fmt.Sprintf("xds:///%s", serviceName), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(xdsR))
   221  	if err != nil {
   222  		t.Fatalf("failed to dial local test server: %v", err)
   223  	}
   224  	t.Cleanup(func() { cc.Close() })
   225  
   226  	if err := verifyRPCtoAllEndpoints(cc); err != nil {
   227  		t.Fatal(err)
   228  	}
   229  
   230  	// Mutate resource and update on the server.
   231  	updateResource(&resources)
   232  	if err := mgmtServer.Update(ctx, resources); err != nil {
   233  		t.Fatal(err)
   234  	}
   235  
   236  	// Spin up go routines to verify RPCs fail after the update. The xDS node ID
   237  	// needs to be part of the error seen by the RPC caller.
   238  	client := testgrpc.NewTestServiceClient(cc)
   239  	wg := sync.WaitGroup{}
   240  	wg.Add(2)
   241  	go func() {
   242  		defer wg.Done()
   243  		for ; ctx.Err() == nil; <-time.After(10 * time.Millisecond) {
   244  			_, err := client.EmptyCall(ctx, &testpb.Empty{})
   245  			if err == nil {
   246  				continue
   247  			}
   248  			if status.Code(err) == codes.Unavailable && strings.Contains(err.Error(), nodeID) {
   249  				return
   250  			}
   251  		}
   252  	}()
   253  	go func() {
   254  		defer wg.Done()
   255  		for ; ctx.Err() == nil; <-time.After(10 * time.Millisecond) {
   256  			_, err := client.UnaryCall(ctx, &testpb.SimpleRequest{})
   257  			if err == nil {
   258  				continue
   259  			}
   260  			if status.Code(err) == codes.Unavailable && strings.Contains(err.Error(), nodeID) {
   261  				return
   262  			}
   263  		}
   264  	}()
   265  
   266  	wg.Wait()
   267  	if ctx.Err() != nil {
   268  		t.Fatal("Context expired before RPCs failed.")
   269  	}
   270  }
   271  
   272  // This helper generates a custom bootstrap config for the test.
   273  func generateBootstrapContents(t *testing.T, serverURI string, ignoreResourceDeletion bool, nodeID string) []byte {
   274  	t.Helper()
   275  	var serverCfgs json.RawMessage
   276  	if ignoreResourceDeletion {
   277  		serverCfgs = []byte(fmt.Sprintf(`[{
   278  			"server_uri": %q,
   279  			"channel_creds": [{"type": "insecure"}],
   280  			"server_features": ["ignore_resource_deletion"]
   281  		}]`, serverURI))
   282  	} else {
   283  		serverCfgs = []byte(fmt.Sprintf(`[{
   284  			"server_uri": %q,
   285  			"channel_creds": [{"type": "insecure"}]
   286  		}]`, serverURI))
   287  
   288  	}
   289  	bootstrapContents, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{
   290  		Servers:                            serverCfgs,
   291  		Node:                               fmt.Appendf(nil, `{"id": "%s"}`, nodeID),
   292  		ServerListenerResourceNameTemplate: e2e.ServerListenerResourceNameTemplate,
   293  	})
   294  	if err != nil {
   295  		t.Fatal(err)
   296  	}
   297  	return bootstrapContents
   298  }
   299  
   300  // This helper creates an XDS resolver Builder from the bootstrap config passed
   301  // as parameter.
   302  func xdsResolverBuilder(t *testing.T, bs []byte) resolver.Builder {
   303  	t.Helper()
   304  	if internal.NewXDSResolverWithConfigForTesting == nil {
   305  		t.Fatalf("internal.NewXDSResolverWithConfigForTesting is nil")
   306  	}
   307  	xdsR, err := internal.NewXDSResolverWithConfigForTesting.(func([]byte) (resolver.Builder, error))(bs)
   308  	if err != nil {
   309  		t.Fatalf("Creating xDS resolver for testing failed for config %q: %v", string(bs), err)
   310  	}
   311  	return xdsR
   312  }
   313  
   314  // This helper creates an xDS-enabled gRPC server using the listener and the
   315  // bootstrap config passed. It then registers the test service on the newly
   316  // created gRPC server and starts serving.
   317  func setupGRPCServerWithModeChangeChannelAndServe(t *testing.T, bootstrapContents []byte, lis net.Listener) chan connectivity.ServingMode {
   318  	t.Helper()
   319  	updateCh := make(chan connectivity.ServingMode, 1)
   320  
   321  	// Create a server option to get notified about serving mode changes.
   322  	modeChangeOpt := xds.ServingModeCallback(func(addr net.Addr, args xds.ServingModeChangeArgs) {
   323  		t.Logf("Serving mode for listener %q changed to %q, err: %v", addr.String(), args.Mode, args.Err)
   324  		updateCh <- args.Mode
   325  	})
   326  	stub := &stubserver.StubServer{
   327  		Listener: lis,
   328  		EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) {
   329  			return &testpb.Empty{}, nil
   330  		},
   331  		UnaryCallF: func(ctx context.Context, in *testpb.SimpleRequest) (*testpb.SimpleResponse, error) {
   332  			return &testpb.SimpleResponse{}, nil
   333  		},
   334  	}
   335  	server, err := xds.NewGRPCServer(grpc.Creds(insecure.NewCredentials()), modeChangeOpt, xds.BootstrapContentsForTesting(bootstrapContents))
   336  	if err != nil {
   337  		t.Fatalf("Failed to create an xDS enabled gRPC server: %v", err)
   338  	}
   339  	stub.S = server
   340  	t.Cleanup(stub.S.Stop)
   341  
   342  	stubserver.StartTestService(t, stub)
   343  
   344  	return updateCh
   345  }
   346  
   347  // This helper creates a new TCP listener. This helper also uses this listener to
   348  // create a resource update with a listener resource. This helper returns the
   349  // resource update and the TCP listener.
   350  func resourceWithListenerForGRPCServer(t *testing.T, nodeID string) (e2e.UpdateOptions, net.Listener) {
   351  	t.Helper()
   352  	lis, err := testutils.LocalTCPListener()
   353  	if err != nil {
   354  		t.Fatalf("testutils.LocalTCPListener() failed: %v", err)
   355  	}
   356  	t.Cleanup(func() { lis.Close() })
   357  	host, port, err := hostPortFromListener(lis)
   358  	if err != nil {
   359  		t.Fatalf("Failed to retrieve host and port of listener at %q: %v", lis.Addr(), err)
   360  	}
   361  	listener := e2e.DefaultServerListener(host, port, e2e.SecurityLevelNone, "routeName")
   362  	resources := e2e.UpdateOptions{
   363  		NodeID:    nodeID,
   364  		Listeners: []*listenerpb.Listener{listener},
   365  	}
   366  	return resources, lis
   367  }
   368  
   369  // This test creates a gRPC server which provides server-side xDS functionality
   370  // by talking to a custom management server. This tests the scenario where bootstrap
   371  // config with "server_features" includes "ignore_resource_deletion". In which
   372  // case, when the listener resource is deleted on the management server, the gRPC
   373  // server should continue to serve RPCs.
   374  func (s) TestListenerResourceDeletionOnServerIgnored(t *testing.T) {
   375  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{})
   376  	nodeID := uuid.New().String()
   377  	bs := generateBootstrapContents(t, mgmtServer.Address, true, nodeID)
   378  	xdsR := xdsResolverBuilder(t, bs)
   379  	resources, lis := resourceWithListenerForGRPCServer(t, nodeID)
   380  	modeChangeCh := setupGRPCServerWithModeChangeChannelAndServe(t, bs, lis)
   381  
   382  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   383  	defer cancel()
   384  	if err := mgmtServer.Update(ctx, resources); err != nil {
   385  		t.Fatal(err)
   386  	}
   387  
   388  	// Wait for the server to update to ServingModeServing mode.
   389  	select {
   390  	case <-ctx.Done():
   391  		t.Fatal("Test timed out waiting for a server to change to ServingModeServing.")
   392  	case mode := <-modeChangeCh:
   393  		if mode != connectivity.ServingModeServing {
   394  			t.Fatalf("Server switched to mode %v, want %v", mode, connectivity.ServingModeServing)
   395  		}
   396  	}
   397  
   398  	// Create a ClientConn and make a successful RPCs.
   399  	cc, err := grpc.NewClient(lis.Addr().String(), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(xdsR))
   400  	if err != nil {
   401  		t.Fatalf("failed to dial local test server: %v", err)
   402  	}
   403  	defer cc.Close()
   404  
   405  	if err := verifyRPCtoAllEndpoints(cc); err != nil {
   406  		t.Fatal(err)
   407  	}
   408  
   409  	// Update without a listener resource.
   410  	if err := mgmtServer.Update(ctx, e2e.UpdateOptions{
   411  		NodeID:    nodeID,
   412  		Listeners: []*listenerpb.Listener{},
   413  	}); err != nil {
   414  		t.Fatal(err)
   415  	}
   416  
   417  	// Perform RPCs every 100 ms for 1s and verify that the serving mode does not
   418  	// change on gRPC server.
   419  	timer := time.NewTimer(500 * time.Millisecond)
   420  	ticker := time.NewTicker(50 * time.Millisecond)
   421  	t.Cleanup(ticker.Stop)
   422  	for {
   423  		if err := verifyRPCtoAllEndpoints(cc); err != nil {
   424  			t.Fatal(err)
   425  		}
   426  		select {
   427  		case <-timer.C:
   428  			return
   429  		case mode := <-modeChangeCh:
   430  			t.Fatalf("Server switched to mode: %v when no switch was expected", mode)
   431  		case <-ticker.C:
   432  		}
   433  	}
   434  }
   435  
   436  // This test creates a gRPC server which provides server-side xDS functionality
   437  // by talking to a custom management server. This tests the scenario where bootstrap
   438  // config with "server_features" does not include "ignore_resource_deletion". In
   439  // which case, when the listener resource is deleted on the management server, the
   440  // gRPC server should stop serving RPCs and switch mode to ServingModeNotServing.
   441  func (s) TestListenerResourceDeletionOnServerNotIgnored(t *testing.T) {
   442  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{})
   443  	nodeID := uuid.New().String()
   444  	bs := generateBootstrapContents(t, mgmtServer.Address, false, nodeID)
   445  	xdsR := xdsResolverBuilder(t, bs)
   446  	resources, lis := resourceWithListenerForGRPCServer(t, nodeID)
   447  	updateCh := setupGRPCServerWithModeChangeChannelAndServe(t, bs, lis)
   448  
   449  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   450  	defer cancel()
   451  	if err := mgmtServer.Update(ctx, resources); err != nil {
   452  		t.Fatal(err)
   453  	}
   454  
   455  	// Wait for the listener to move to "serving" mode.
   456  	select {
   457  	case <-ctx.Done():
   458  		t.Fatal("Test timed out waiting for a mode change update.")
   459  	case mode := <-updateCh:
   460  		if mode != connectivity.ServingModeServing {
   461  			t.Fatalf("Listener received new mode %v, want %v", mode, connectivity.ServingModeServing)
   462  		}
   463  	}
   464  
   465  	// Create a ClientConn and make a successful RPCs.
   466  	cc, err := grpc.NewClient(lis.Addr().String(), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(xdsR))
   467  	if err != nil {
   468  		t.Fatalf("failed to dial local test server: %v", err)
   469  	}
   470  	defer cc.Close()
   471  	if err := verifyRPCtoAllEndpoints(cc); err != nil {
   472  		t.Fatal(err)
   473  	}
   474  
   475  	if err := mgmtServer.Update(ctx, e2e.UpdateOptions{
   476  		NodeID:    nodeID,
   477  		Listeners: []*listenerpb.Listener{}, // empty listener resource
   478  	}); err != nil {
   479  		t.Fatal(err)
   480  	}
   481  
   482  	select {
   483  	case <-ctx.Done():
   484  		t.Fatalf("timed out waiting for a mode change update: %v", err)
   485  	case mode := <-updateCh:
   486  		if mode != connectivity.ServingModeNotServing {
   487  			t.Fatalf("listener received new mode %v, want %v", mode, connectivity.ServingModeNotServing)
   488  		}
   489  	}
   490  }
   491  
   492  // This helper makes both UnaryCall and EmptyCall RPCs using the ClientConn that
   493  // is passed to this function. This helper panics for any failed RPCs.
   494  func verifyRPCtoAllEndpoints(cc grpc.ClientConnInterface) error {
   495  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   496  	defer cancel()
   497  	client := testgrpc.NewTestServiceClient(cc)
   498  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   499  		return fmt.Errorf("rpc EmptyCall() failed: %v", err)
   500  	}
   501  	if _, err := client.UnaryCall(ctx, &testpb.SimpleRequest{}); err != nil {
   502  		return fmt.Errorf("rpc UnaryCall() failed: %v", err)
   503  	}
   504  	return nil
   505  }