google.golang.org/grpc@v1.72.2/xds/internal/xdsclient/tests/ads_stream_backoff_test.go (about)

     1  /*
     2   *
     3   * Copyright 2024 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package xdsclient_test
    20  
    21  import (
    22  	"context"
    23  	"errors"
    24  	"fmt"
    25  	"testing"
    26  	"time"
    27  
    28  	"github.com/google/go-cmp/cmp"
    29  	"github.com/google/go-cmp/cmp/cmpopts"
    30  	"github.com/google/uuid"
    31  	"google.golang.org/grpc"
    32  	"google.golang.org/grpc/internal/testutils"
    33  	"google.golang.org/grpc/internal/testutils/xds/e2e"
    34  	"google.golang.org/grpc/internal/xds/bootstrap"
    35  	"google.golang.org/grpc/xds/internal/xdsclient"
    36  	"google.golang.org/grpc/xds/internal/xdsclient/xdsresource"
    37  	"google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version"
    38  	"google.golang.org/protobuf/testing/protocmp"
    39  
    40  	v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
    41  	v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3"
    42  	v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
    43  )
    44  
    45  // Creates an xDS client with the given bootstrap contents and backoff function.
    46  func createXDSClientWithBackoff(t *testing.T, bootstrapContents []byte, streamBackoff func(int) time.Duration) xdsclient.XDSClient {
    47  	t.Helper()
    48  
    49  	config, err := bootstrap.NewConfigFromContents(bootstrapContents)
    50  	if err != nil {
    51  		t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err)
    52  	}
    53  	pool := xdsclient.NewPool(config)
    54  	client, close, err := pool.NewClientForTesting(xdsclient.OptionsForTesting{
    55  		Name:                      t.Name(),
    56  		StreamBackoffAfterFailure: streamBackoff,
    57  	})
    58  	if err != nil {
    59  		t.Fatalf("Failed to create xDS client: %v", err)
    60  	}
    61  	t.Cleanup(close)
    62  	return client
    63  }
    64  
    65  // Tests the case where the management server returns an error in the ADS
    66  // streaming RPC. Verifies that the ADS stream is restarted after a backoff
    67  // period, and that the previously requested resources are re-requested on the
    68  // new stream.
    69  func (s) TestADS_BackoffAfterStreamFailure(t *testing.T) {
    70  	// Channels used for verifying different events in the test.
    71  	streamCloseCh := make(chan struct{}, 1)  // ADS stream is closed.
    72  	ldsResourcesCh := make(chan []string, 1) // Listener resource names in the discovery request.
    73  	backoffCh := make(chan struct{}, 1)      // Backoff after stream failure.
    74  
    75  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
    76  	defer cancel()
    77  
    78  	// Create an xDS management server that returns RPC errors.
    79  	streamErr := errors.New("ADS stream error")
    80  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{
    81  		OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error {
    82  			// Push the requested resource names on to a channel.
    83  			if req.GetTypeUrl() == version.V3ListenerURL {
    84  				t.Logf("Received LDS request for resources: %v", req.GetResourceNames())
    85  				select {
    86  				case ldsResourcesCh <- req.GetResourceNames():
    87  				case <-ctx.Done():
    88  				}
    89  			}
    90  			// Return an error everytime a request is sent on the stream. This
    91  			// should cause the transport to backoff before attempting to
    92  			// recreate the stream.
    93  			return streamErr
    94  		},
    95  		// Push on a channel whenever the stream is closed.
    96  		OnStreamClosed: func(int64, *v3corepb.Node) {
    97  			select {
    98  			case streamCloseCh <- struct{}{}:
    99  			case <-ctx.Done():
   100  			}
   101  		},
   102  	})
   103  
   104  	// Override the backoff implementation to push on a channel that is read by
   105  	// the test goroutine.
   106  	backoffCtx, backoffCancel := context.WithCancel(ctx)
   107  	streamBackoff := func(v int) time.Duration {
   108  		select {
   109  		case backoffCh <- struct{}{}:
   110  		case <-backoffCtx.Done():
   111  		}
   112  		return 0
   113  	}
   114  	defer backoffCancel()
   115  
   116  	// Create an xDS client with bootstrap pointing to the above server.
   117  	nodeID := uuid.New().String()
   118  	bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address)
   119  	client := createXDSClientWithBackoff(t, bc, streamBackoff)
   120  
   121  	// Register a watch for a listener resource.
   122  	const listenerName = "listener"
   123  	lw := newListenerWatcher()
   124  	ldsCancel := xdsresource.WatchListener(client, listenerName, lw)
   125  	defer ldsCancel()
   126  
   127  	// Verify that an ADS stream is created and an LDS request with the above
   128  	// resource name is sent.
   129  	if err := waitForResourceNames(ctx, t, ldsResourcesCh, []string{listenerName}); err != nil {
   130  		t.Fatal(err)
   131  	}
   132  
   133  	// Verify that the received stream error is reported to the watcher.
   134  	if err := verifyListenerError(ctx, lw.updateCh, streamErr.Error(), nodeID); err != nil {
   135  		t.Fatal(err)
   136  	}
   137  
   138  	// Verify that the stream is closed.
   139  	select {
   140  	case <-streamCloseCh:
   141  	case <-ctx.Done():
   142  		t.Fatalf("Timeout waiting for stream to be closed after an error")
   143  	}
   144  
   145  	// Verify that the ADS stream backs off before recreating the stream.
   146  	select {
   147  	case <-backoffCh:
   148  	case <-ctx.Done():
   149  		t.Fatalf("Timeout waiting for ADS stream to backoff after stream failure")
   150  	}
   151  
   152  	// Verify that the same resource name is re-requested on the new stream.
   153  	if err := waitForResourceNames(ctx, t, ldsResourcesCh, []string{listenerName}); err != nil {
   154  		t.Fatal(err)
   155  	}
   156  
   157  	// To prevent indefinite blocking during xDS client close, which is caused
   158  	// by a blocking backoff channel write, cancel the backoff context early
   159  	// given that the test is complete.
   160  	backoffCancel()
   161  
   162  }
   163  
   164  // Tests the case where a stream breaks because the server goes down. Verifies
   165  // that when the server comes back up, the same resources are re-requested, this
   166  // time with the previously acked version and an empty nonce.
   167  func (s) TestADS_RetriesAfterBrokenStream(t *testing.T) {
   168  	// Channels used for verifying different events in the test.
   169  	streamRequestCh := make(chan *v3discoverypb.DiscoveryRequest, 1)   // Discovery request is received.
   170  	streamResponseCh := make(chan *v3discoverypb.DiscoveryResponse, 1) // Discovery response is received.
   171  
   172  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   173  	defer cancel()
   174  
   175  	// Create an xDS management server listening on a local port.
   176  	l, err := testutils.LocalTCPListener()
   177  	if err != nil {
   178  		t.Fatalf("Failed to create a local listener for the xDS management server: %v", err)
   179  	}
   180  	lis := testutils.NewRestartableListener(l)
   181  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{
   182  		Listener: lis,
   183  		// Push the received request on to a channel for the test goroutine to
   184  		// verify that it matches expectations.
   185  		OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error {
   186  			select {
   187  			case streamRequestCh <- req:
   188  			case <-ctx.Done():
   189  			}
   190  			return nil
   191  		},
   192  		// Push the response that the management server is about to send on to a
   193  		// channel. The test goroutine to uses this to extract the version and
   194  		// nonce, expected on subsequent requests.
   195  		OnStreamResponse: func(_ context.Context, _ int64, _ *v3discoverypb.DiscoveryRequest, resp *v3discoverypb.DiscoveryResponse) {
   196  			select {
   197  			case streamResponseCh <- resp:
   198  			case <-ctx.Done():
   199  			}
   200  		},
   201  	})
   202  
   203  	// Create a listener resource on the management server.
   204  	const listenerName = "listener"
   205  	const routeConfigName = "route-config"
   206  	nodeID := uuid.New().String()
   207  	resources := e2e.UpdateOptions{
   208  		NodeID:         nodeID,
   209  		Listeners:      []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerName, routeConfigName)},
   210  		SkipValidation: true,
   211  	}
   212  	if err := mgmtServer.Update(ctx, resources); err != nil {
   213  		t.Fatal(err)
   214  	}
   215  
   216  	// Override the backoff implementation to always return 0, to reduce test
   217  	// run time. Instead control when the backoff returns by blocking on a
   218  	// channel, that the test closes.
   219  	backoffCh := make(chan struct{})
   220  	streamBackoff := func(v int) time.Duration {
   221  		select {
   222  		case backoffCh <- struct{}{}:
   223  		case <-ctx.Done():
   224  		}
   225  		return 0
   226  	}
   227  
   228  	// Create an xDS client with bootstrap pointing to the above server.
   229  	bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address)
   230  	client := createXDSClientWithBackoff(t, bc, streamBackoff)
   231  
   232  	// Register a watch for a listener resource.
   233  	lw := newListenerWatcher()
   234  	ldsCancel := xdsresource.WatchListener(client, listenerName, lw)
   235  	defer ldsCancel()
   236  
   237  	// Verify that the initial discovery request matches expectation.
   238  	var gotReq *v3discoverypb.DiscoveryRequest
   239  	select {
   240  	case gotReq = <-streamRequestCh:
   241  	case <-ctx.Done():
   242  		t.Fatalf("Timeout waiting for discovery request on the stream")
   243  	}
   244  	wantReq := &v3discoverypb.DiscoveryRequest{
   245  		VersionInfo: "",
   246  		Node: &v3corepb.Node{
   247  			Id:                   nodeID,
   248  			UserAgentName:        "gRPC Go",
   249  			UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: grpc.Version},
   250  			ClientFeatures:       []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw"},
   251  		},
   252  		ResourceNames: []string{listenerName},
   253  		TypeUrl:       "type.googleapis.com/envoy.config.listener.v3.Listener",
   254  		ResponseNonce: "",
   255  	}
   256  	if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" {
   257  		t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff)
   258  	}
   259  
   260  	// Capture the version and nonce from the response.
   261  	var gotResp *v3discoverypb.DiscoveryResponse
   262  	select {
   263  	case gotResp = <-streamResponseCh:
   264  	case <-ctx.Done():
   265  		t.Fatalf("Timeout waiting for discovery response on the stream")
   266  	}
   267  	version := gotResp.GetVersionInfo()
   268  	nonce := gotResp.GetNonce()
   269  
   270  	// Verify that the ACK contains the appropriate version and nonce.
   271  	wantReq.VersionInfo = version
   272  	wantReq.ResponseNonce = nonce
   273  	select {
   274  	case gotReq = <-streamRequestCh:
   275  	case <-ctx.Done():
   276  		t.Fatalf("Timeout waiting for the discovery request ACK on the stream")
   277  	}
   278  	if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" {
   279  		t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff)
   280  	}
   281  
   282  	// Verify the update received by the watcher.
   283  	wantUpdate := listenerUpdateErrTuple{
   284  		update: xdsresource.ListenerUpdate{
   285  			RouteConfigName: routeConfigName,
   286  			HTTPFilters:     []xdsresource.HTTPFilter{{Name: "router"}},
   287  		},
   288  	}
   289  	if err := verifyListenerUpdate(ctx, lw.updateCh, wantUpdate); err != nil {
   290  		t.Fatal(err)
   291  	}
   292  
   293  	// Bring down the management server to simulate a broken stream.
   294  	lis.Stop()
   295  
   296  	// Verify that the error callback on the watcher is not invoked.
   297  	verifyNoListenerUpdate(ctx, lw.updateCh)
   298  
   299  	// Wait for backoff to kick in, and unblock the first backoff attempt.
   300  	select {
   301  	case <-backoffCh:
   302  	case <-ctx.Done():
   303  		t.Fatal("Timeout waiting for stream backoff")
   304  	}
   305  
   306  	// Bring up the management server. The test does not have prcecise control
   307  	// over when new streams to the management server will start succeeding. The
   308  	// ADS stream implementation will backoff as many times as required before
   309  	// it can successfully create a new stream. Therefore, we need to receive on
   310  	// the backoffCh as many times as required, and unblock the backoff
   311  	// implementation.
   312  	lis.Restart()
   313  	go func() {
   314  		for {
   315  			select {
   316  			case <-backoffCh:
   317  			case <-ctx.Done():
   318  				return
   319  			}
   320  		}
   321  	}()
   322  
   323  	// Verify that the transport creates a new stream and sends out a new
   324  	// request which contains the previously acked version, but an empty nonce.
   325  	wantReq.ResponseNonce = ""
   326  	select {
   327  	case gotReq = <-streamRequestCh:
   328  	case <-ctx.Done():
   329  		t.Fatalf("Timeout waiting for the discovery request ACK on the stream")
   330  	}
   331  	if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" {
   332  		t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff)
   333  	}
   334  }
   335  
   336  // Tests the case where a resource is requested before the a valid ADS stream
   337  // exists. Verifies that the a discovery request is sent out for the previously
   338  // requested resource once a valid stream is created.
   339  func (s) TestADS_ResourceRequestedBeforeStreamCreation(t *testing.T) {
   340  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   341  	defer cancel()
   342  
   343  	// Channels used for verifying different events in the test.
   344  	streamRequestCh := make(chan *v3discoverypb.DiscoveryRequest, 1) // Discovery request is received.
   345  
   346  	// Create an xDS management server listening on a local port.
   347  	l, err := testutils.LocalTCPListener()
   348  	if err != nil {
   349  		t.Fatalf("Failed to create a local listener: %v", err)
   350  	}
   351  	lis := testutils.NewRestartableListener(l)
   352  	streamErr := errors.New("ADS stream error")
   353  
   354  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{
   355  		Listener: lis,
   356  
   357  		// Return an error everytime a request is sent on the stream. This
   358  		// should cause the transport to backoff before attempting to recreate
   359  		// the stream.
   360  		OnStreamRequest: func(id int64, req *v3discoverypb.DiscoveryRequest) error {
   361  			select {
   362  			case streamRequestCh <- req:
   363  			default:
   364  			}
   365  			return streamErr
   366  		},
   367  	})
   368  
   369  	// Bring down the management server before creating the transport. This
   370  	// allows us to test the case where SendRequest() is called when there is no
   371  	// stream to the management server.
   372  	lis.Stop()
   373  
   374  	// Override the backoff implementation to always return 0, to reduce test
   375  	// run time. Instead control when the backoff returns by blocking on a
   376  	// channel, that the test closes.
   377  	backoffCh := make(chan struct{}, 1)
   378  	unblockBackoffCh := make(chan struct{})
   379  	streamBackoff := func(v int) time.Duration {
   380  		select {
   381  		case backoffCh <- struct{}{}:
   382  		default:
   383  		}
   384  		<-unblockBackoffCh
   385  		return 0
   386  	}
   387  
   388  	// Create an xDS client with bootstrap pointing to the above server.
   389  	nodeID := uuid.New().String()
   390  	bc := e2e.DefaultBootstrapContents(t, nodeID, mgmtServer.Address)
   391  	client := createXDSClientWithBackoff(t, bc, streamBackoff)
   392  
   393  	// Register a watch for a listener resource.
   394  	const listenerName = "listener"
   395  	lw := newListenerWatcher()
   396  	ldsCancel := xdsresource.WatchListener(client, listenerName, lw)
   397  	defer ldsCancel()
   398  
   399  	// The above watch results in an attempt to create a new stream, which will
   400  	// fail, and will result in backoff. Wait for backoff to kick in.
   401  	select {
   402  	case <-backoffCh:
   403  	case <-ctx.Done():
   404  		t.Fatal("Timeout waiting for stream backoff")
   405  	}
   406  
   407  	// Bring up the connection to the management server, and unblock the backoff
   408  	// implementation.
   409  	lis.Restart()
   410  	close(unblockBackoffCh)
   411  
   412  	// Verify that the initial discovery request matches expectation.
   413  	var gotReq *v3discoverypb.DiscoveryRequest
   414  	select {
   415  	case gotReq = <-streamRequestCh:
   416  	case <-ctx.Done():
   417  		t.Fatalf("Timeout waiting for discovery request on the stream")
   418  	}
   419  	wantReq := &v3discoverypb.DiscoveryRequest{
   420  		VersionInfo: "",
   421  		Node: &v3corepb.Node{
   422  			Id:                   nodeID,
   423  			UserAgentName:        "gRPC Go",
   424  			UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: grpc.Version},
   425  			ClientFeatures:       []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw"},
   426  		},
   427  		ResourceNames: []string{listenerName},
   428  		TypeUrl:       "type.googleapis.com/envoy.config.listener.v3.Listener",
   429  		ResponseNonce: "",
   430  	}
   431  	if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" {
   432  		t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff)
   433  	}
   434  }
   435  
   436  // waitForResourceNames waits for the wantNames to be received on namesCh.
   437  // Returns a non-nil error if the context expires before that.
   438  func waitForResourceNames(ctx context.Context, t *testing.T, namesCh chan []string, wantNames []string) error {
   439  	t.Helper()
   440  
   441  	var lastRequestedNames []string
   442  	for ; ; <-time.After(defaultTestShortTimeout) {
   443  		select {
   444  		case <-ctx.Done():
   445  			return fmt.Errorf("timeout waiting for resources %v to be requested from the management server. Last requested resources: %v", wantNames, lastRequestedNames)
   446  		case gotNames := <-namesCh:
   447  			if cmp.Equal(gotNames, wantNames, cmpopts.EquateEmpty(), cmpopts.SortSlices(func(s1, s2 string) bool { return s1 < s2 })) {
   448  				return nil
   449  			}
   450  			lastRequestedNames = gotNames
   451  		}
   452  	}
   453  }