google.golang.org/grpc@v1.62.1/xds/internal/xdsclient/transport/transport_backoff_test.go (about)

     1  /*
     2   *
     3   * Copyright 2022 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   */
    17  
    18  package transport_test
    19  
    20  import (
    21  	"context"
    22  	"errors"
    23  	"strings"
    24  	"testing"
    25  	"time"
    26  
    27  	"github.com/google/go-cmp/cmp"
    28  	"github.com/google/go-cmp/cmp/cmpopts"
    29  	"github.com/google/uuid"
    30  	"google.golang.org/grpc/connectivity"
    31  	"google.golang.org/grpc/internal/testutils"
    32  	"google.golang.org/grpc/internal/testutils/xds/e2e"
    33  	xdstestutils "google.golang.org/grpc/xds/internal/testutils"
    34  	"google.golang.org/grpc/xds/internal/xdsclient/transport"
    35  	"google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version"
    36  	"google.golang.org/protobuf/testing/protocmp"
    37  	"google.golang.org/protobuf/types/known/anypb"
    38  
    39  	v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
    40  	v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3"
    41  	v3httppb "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/network/http_connection_manager/v3"
    42  	v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
    43  )
    44  
    45  var strSort = func(s1, s2 string) bool { return s1 < s2 }
    46  
    47  // TestTransport_BackoffAfterStreamFailure tests the case where the management
    48  // server returns an error in the ADS streaming RPC. The test verifies the
    49  // following:
    50  // 1. Initial discovery request matches expectation.
    51  // 2. RPC error is propagated via the stream error handler.
    52  // 3. When the stream is closed, the transport backs off.
    53  // 4. The same discovery request is sent on the newly created stream.
    54  func (s) TestTransport_BackoffAfterStreamFailure(t *testing.T) {
    55  	// Channels used for verifying different events in the test.
    56  	streamCloseCh := make(chan struct{}, 1)                          // ADS stream is closed.
    57  	streamRequestCh := make(chan *v3discoverypb.DiscoveryRequest, 1) // Discovery request is received.
    58  	backoffCh := make(chan struct{}, 1)                              // Transport backoff after stream failure.
    59  	streamErrCh := make(chan error, 1)                               // Stream error seen by the transport.
    60  
    61  	// Create an xDS management server listening on a local port.
    62  	streamErr := errors.New("ADS stream error")
    63  	mgmtServer, err := e2e.StartManagementServer(e2e.ManagementServerOptions{
    64  		// Push on a channel whenever the stream is closed.
    65  		OnStreamClosed: func(int64, *v3corepb.Node) {
    66  			select {
    67  			case streamCloseCh <- struct{}{}:
    68  			default:
    69  			}
    70  		},
    71  
    72  		// Return an error everytime a request is sent on the stream. This
    73  		// should cause the transport to backoff before attempting to recreate
    74  		// the stream.
    75  		OnStreamRequest: func(id int64, req *v3discoverypb.DiscoveryRequest) error {
    76  			select {
    77  			case streamRequestCh <- req:
    78  			default:
    79  			}
    80  			return streamErr
    81  		},
    82  	})
    83  	if err != nil {
    84  		t.Fatalf("Failed to start xDS management server: %v", err)
    85  	}
    86  	defer mgmtServer.Stop()
    87  	t.Logf("Started xDS management server on %s", mgmtServer.Address)
    88  
    89  	// Override the backoff implementation to push on a channel that is read by
    90  	// the test goroutine.
    91  	transportBackoff := func(v int) time.Duration {
    92  		select {
    93  		case backoffCh <- struct{}{}:
    94  		default:
    95  		}
    96  		return 0
    97  	}
    98  
    99  	// Create a new transport. Since we are only testing backoff behavior here,
   100  	// we can pass a no-op data model layer implementation.
   101  	nodeID := uuid.New().String()
   102  	tr, err := transport.New(transport.Options{
   103  		ServerCfg:     *xdstestutils.ServerConfigForAddress(t, mgmtServer.Address),
   104  		OnRecvHandler: func(transport.ResourceUpdate) error { return nil }, // No data model layer validation.
   105  		OnErrorHandler: func(err error) {
   106  			select {
   107  			case streamErrCh <- err:
   108  			default:
   109  			}
   110  		},
   111  		OnSendHandler: func(*transport.ResourceSendInfo) {},
   112  		Backoff:       transportBackoff,
   113  		NodeProto:     &v3corepb.Node{Id: nodeID},
   114  	})
   115  	if err != nil {
   116  		t.Fatalf("Failed to create xDS transport: %v", err)
   117  	}
   118  	defer tr.Close()
   119  
   120  	// Send a discovery request through the transport.
   121  	const resourceName = "resource name"
   122  	tr.SendRequest(version.V3ListenerURL, []string{resourceName})
   123  
   124  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   125  	defer cancel()
   126  
   127  	// Verify that the initial discovery request matches expectation.
   128  	var gotReq *v3discoverypb.DiscoveryRequest
   129  	select {
   130  	case gotReq = <-streamRequestCh:
   131  	case <-ctx.Done():
   132  		t.Fatalf("Timeout waiting for discovery request on the stream")
   133  	}
   134  	wantReq := &v3discoverypb.DiscoveryRequest{
   135  		VersionInfo:   "",
   136  		Node:          &v3corepb.Node{Id: nodeID},
   137  		ResourceNames: []string{resourceName},
   138  		TypeUrl:       "type.googleapis.com/envoy.config.listener.v3.Listener",
   139  		ResponseNonce: "",
   140  	}
   141  	if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" {
   142  		t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff)
   143  	}
   144  
   145  	// Verify that the received stream error is reported to the user.
   146  	var gotErr error
   147  	select {
   148  	case gotErr = <-streamErrCh:
   149  	case <-ctx.Done():
   150  		t.Fatalf("Timeout waiting for stream error to be reported to the user")
   151  	}
   152  	if !strings.Contains(gotErr.Error(), streamErr.Error()) {
   153  		t.Fatalf("Received stream error: %v, wantErr: %v", gotErr, streamErr)
   154  	}
   155  
   156  	// Verify that the stream is closed.
   157  	select {
   158  	case <-streamCloseCh:
   159  	case <-ctx.Done():
   160  		t.Fatalf("Timeout waiting for stream to be closed after an error")
   161  	}
   162  
   163  	// Verify that the transport backs off before recreating the stream.
   164  	select {
   165  	case <-backoffCh:
   166  	case <-ctx.Done():
   167  		t.Fatalf("Timeout waiting for transport to backoff after stream failure")
   168  	}
   169  
   170  	// Verify that the same discovery request is resent on the new stream.
   171  	select {
   172  	case gotReq = <-streamRequestCh:
   173  	case <-ctx.Done():
   174  		t.Fatalf("Timeout waiting for discovery request on the stream")
   175  	}
   176  	if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" {
   177  		t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff)
   178  	}
   179  }
   180  
   181  // TestTransport_RetriesAfterBrokenStream tests the case where a stream breaks
   182  // because the server goes down. The test verifies the following:
   183  //  1. Initial discovery request matches expectation.
   184  //  2. Good response from the server leads to an ACK with appropriate version.
   185  //  3. Management server going down, leads to stream failure.
   186  //  4. Once the management server comes back up, the same resources are
   187  //     re-requested, this time with an empty nonce.
   188  func (s) TestTransport_RetriesAfterBrokenStream(t *testing.T) {
   189  	// Channels used for verifying different events in the test.
   190  	streamRequestCh := make(chan *v3discoverypb.DiscoveryRequest, 1)   // Discovery request is received.
   191  	streamResponseCh := make(chan *v3discoverypb.DiscoveryResponse, 1) // Discovery response is received.
   192  	streamErrCh := make(chan error, 1)                                 // Stream error seen by the transport.
   193  
   194  	// Create an xDS management server listening on a local port.
   195  	l, err := testutils.LocalTCPListener()
   196  	if err != nil {
   197  		t.Fatalf("Failed to create a local listener for the xDS management server: %v", err)
   198  	}
   199  	lis := testutils.NewRestartableListener(l)
   200  	mgmtServer, err := e2e.StartManagementServer(e2e.ManagementServerOptions{
   201  		Listener: lis,
   202  		// Push the received request on to a channel for the test goroutine to
   203  		// verify that it matches expectations.
   204  		OnStreamRequest: func(_ int64, req *v3discoverypb.DiscoveryRequest) error {
   205  			select {
   206  			case streamRequestCh <- req:
   207  			default:
   208  			}
   209  			return nil
   210  		},
   211  		// Push the response that the management server is about to send on to a
   212  		// channel. The test goroutine to uses this to extract the version and
   213  		// nonce, expected on subsequent requests.
   214  		OnStreamResponse: func(_ context.Context, _ int64, _ *v3discoverypb.DiscoveryRequest, resp *v3discoverypb.DiscoveryResponse) {
   215  			select {
   216  			case streamResponseCh <- resp:
   217  			default:
   218  			}
   219  		},
   220  	})
   221  	if err != nil {
   222  		t.Fatalf("Failed to start xDS management server: %v", err)
   223  	}
   224  	defer mgmtServer.Stop()
   225  	t.Logf("Started xDS management server on %s", lis.Addr().String())
   226  
   227  	// Configure the management server with appropriate resources.
   228  	apiListener := &v3listenerpb.ApiListener{
   229  		ApiListener: func() *anypb.Any {
   230  			return testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{
   231  				RouteSpecifier: &v3httppb.HttpConnectionManager_Rds{
   232  					Rds: &v3httppb.Rds{
   233  						ConfigSource: &v3corepb.ConfigSource{
   234  							ConfigSourceSpecifier: &v3corepb.ConfigSource_Ads{Ads: &v3corepb.AggregatedConfigSource{}},
   235  						},
   236  						RouteConfigName: "route-configuration-name",
   237  					},
   238  				},
   239  			})
   240  		}(),
   241  	}
   242  	const resourceName1 = "resource name 1"
   243  	const resourceName2 = "resource name 2"
   244  	listenerResource1 := &v3listenerpb.Listener{
   245  		Name:        resourceName1,
   246  		ApiListener: apiListener,
   247  	}
   248  	listenerResource2 := &v3listenerpb.Listener{
   249  		Name:        resourceName2,
   250  		ApiListener: apiListener,
   251  	}
   252  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   253  	defer cancel()
   254  	nodeID := uuid.New().String()
   255  	mgmtServer.Update(ctx, e2e.UpdateOptions{
   256  		NodeID:         nodeID,
   257  		Listeners:      []*v3listenerpb.Listener{listenerResource1, listenerResource2},
   258  		SkipValidation: true,
   259  	})
   260  
   261  	// Create a new transport. Since we are only testing backoff behavior here,
   262  	// we can pass a no-op data model layer implementation.
   263  	tr, err := transport.New(transport.Options{
   264  		ServerCfg:     *xdstestutils.ServerConfigForAddress(t, mgmtServer.Address),
   265  		OnRecvHandler: func(transport.ResourceUpdate) error { return nil }, // No data model layer validation.
   266  		OnErrorHandler: func(err error) {
   267  			select {
   268  			case streamErrCh <- err:
   269  			default:
   270  			}
   271  		},
   272  		OnSendHandler: func(*transport.ResourceSendInfo) {},
   273  		Backoff:       func(int) time.Duration { return time.Duration(0) }, // No backoff.
   274  		NodeProto:     &v3corepb.Node{Id: nodeID},
   275  	})
   276  	if err != nil {
   277  		t.Fatalf("Failed to create xDS transport: %v", err)
   278  	}
   279  	defer tr.Close()
   280  
   281  	// Send a discovery request through the transport.
   282  	tr.SendRequest(version.V3ListenerURL, []string{resourceName1, resourceName2})
   283  
   284  	// Verify that the initial discovery request matches expectation.
   285  	var gotReq *v3discoverypb.DiscoveryRequest
   286  	select {
   287  	case gotReq = <-streamRequestCh:
   288  	case <-ctx.Done():
   289  		t.Fatalf("Timeout waiting for discovery request on the stream")
   290  	}
   291  	wantReq := &v3discoverypb.DiscoveryRequest{
   292  		VersionInfo:   "",
   293  		Node:          &v3corepb.Node{Id: nodeID},
   294  		ResourceNames: []string{resourceName1, resourceName2},
   295  		TypeUrl:       "type.googleapis.com/envoy.config.listener.v3.Listener",
   296  		ResponseNonce: "",
   297  	}
   298  	if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform(), cmpopts.SortSlices(strSort)); diff != "" {
   299  		t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff)
   300  	}
   301  
   302  	// Capture the version and nonce from the response.
   303  	var gotResp *v3discoverypb.DiscoveryResponse
   304  	select {
   305  	case gotResp = <-streamResponseCh:
   306  	case <-ctx.Done():
   307  		t.Fatalf("Timeout waiting for discovery response on the stream")
   308  	}
   309  	version := gotResp.GetVersionInfo()
   310  	nonce := gotResp.GetNonce()
   311  
   312  	// Verify that the ACK contains the appropriate version and nonce.
   313  	wantReq.VersionInfo = version
   314  	wantReq.ResponseNonce = nonce
   315  	select {
   316  	case gotReq = <-streamRequestCh:
   317  	case <-ctx.Done():
   318  		t.Fatalf("Timeout waiting for the discovery request ACK on the stream")
   319  	}
   320  	if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform(), cmpopts.SortSlices(strSort)); diff != "" {
   321  		t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff)
   322  	}
   323  
   324  	// Bring down the management server to simulate a broken stream.
   325  	lis.Stop()
   326  
   327  	// We don't care about the exact error here and it can vary based on which
   328  	// error gets reported first, the Recv() failure or the new stream creation
   329  	// failure. So, all we check here is whether we get an error or not.
   330  	select {
   331  	case <-streamErrCh:
   332  	case <-ctx.Done():
   333  		t.Fatalf("Timeout waiting for stream error to be reported to the user")
   334  	}
   335  
   336  	// Bring up the connection to the management server.
   337  	lis.Restart()
   338  
   339  	// Verify that the transport creates a new stream and sends out a new
   340  	// request which contains the previously acked version, but an empty nonce.
   341  	wantReq.ResponseNonce = ""
   342  	select {
   343  	case gotReq = <-streamRequestCh:
   344  	case <-ctx.Done():
   345  		t.Fatalf("Timeout waiting for the discovery request ACK on the stream")
   346  	}
   347  	if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform(), cmpopts.SortSlices(strSort)); diff != "" {
   348  		t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff)
   349  	}
   350  }
   351  
   352  // TestTransport_ResourceRequestedBeforeStreamCreation tests the case where a
   353  // resource is requested before the transport has a valid stream. Verifies that
   354  // the transport sends out the request once it has a valid stream.
   355  func (s) TestTransport_ResourceRequestedBeforeStreamCreation(t *testing.T) {
   356  	// Channels used for verifying different events in the test.
   357  	streamRequestCh := make(chan *v3discoverypb.DiscoveryRequest, 1) // Discovery request is received.
   358  
   359  	// Create an xDS management server listening on a local port.
   360  	l, err := testutils.LocalTCPListener()
   361  	if err != nil {
   362  		t.Fatalf("Failed to create a local listener for the xDS management server: %v", err)
   363  	}
   364  	lis := testutils.NewRestartableListener(l)
   365  	streamErr := errors.New("ADS stream error")
   366  
   367  	mgmtServer, err := e2e.StartManagementServer(e2e.ManagementServerOptions{
   368  		Listener: lis,
   369  
   370  		// Return an error everytime a request is sent on the stream. This
   371  		// should cause the transport to backoff before attempting to recreate
   372  		// the stream.
   373  		OnStreamRequest: func(id int64, req *v3discoverypb.DiscoveryRequest) error {
   374  			select {
   375  			case streamRequestCh <- req:
   376  			default:
   377  			}
   378  			return streamErr
   379  		},
   380  	})
   381  	if err != nil {
   382  		t.Fatalf("Failed to start xDS management server: %v", err)
   383  	}
   384  	defer mgmtServer.Stop()
   385  	t.Logf("Started xDS management server on %s", lis.Addr().String())
   386  
   387  	// Bring down the management server before creating the transport. This
   388  	// allows us to test the case where SendRequest() is called when there is no
   389  	// stream to the management server.
   390  	lis.Stop()
   391  
   392  	// Create a new transport. Since we are only testing backoff behavior here,
   393  	// we can pass a no-op data model layer implementation.
   394  	nodeID := uuid.New().String()
   395  	tr, err := transport.New(transport.Options{
   396  		ServerCfg:      *xdstestutils.ServerConfigForAddress(t, mgmtServer.Address),
   397  		OnRecvHandler:  func(transport.ResourceUpdate) error { return nil }, // No data model layer validation.
   398  		OnErrorHandler: func(error) {},                                      // No stream error handling.
   399  		OnSendHandler:  func(*transport.ResourceSendInfo) {},                // No on send handler
   400  		Backoff:        func(int) time.Duration { return time.Duration(0) }, // No backoff.
   401  		NodeProto:      &v3corepb.Node{Id: nodeID},
   402  	})
   403  	if err != nil {
   404  		t.Fatalf("Failed to create xDS transport: %v", err)
   405  	}
   406  	defer tr.Close()
   407  
   408  	// Send a discovery request through the transport.
   409  	const resourceName = "resource name"
   410  	tr.SendRequest(version.V3ListenerURL, []string{resourceName})
   411  
   412  	// Wait until the transport has attempted to connect to the management
   413  	// server and has seen the connection fail. In this case, since the
   414  	// connection is down, and the transport creates streams with WaitForReady()
   415  	// set to true, stream creation will never fail (unless the context
   416  	// expires), and therefore we cannot rely on the stream error handler.
   417  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   418  	defer cancel()
   419  	for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) {
   420  		if tr.ChannelConnectivityStateForTesting() == connectivity.TransientFailure {
   421  			break
   422  		}
   423  	}
   424  
   425  	lis.Restart()
   426  
   427  	// Verify that the initial discovery request matches expectation.
   428  	var gotReq *v3discoverypb.DiscoveryRequest
   429  	select {
   430  	case gotReq = <-streamRequestCh:
   431  	case <-ctx.Done():
   432  		t.Fatalf("Timeout waiting for discovery request on the stream")
   433  	}
   434  	wantReq := &v3discoverypb.DiscoveryRequest{
   435  		VersionInfo:   "",
   436  		Node:          &v3corepb.Node{Id: nodeID},
   437  		ResourceNames: []string{resourceName},
   438  		TypeUrl:       "type.googleapis.com/envoy.config.listener.v3.Listener",
   439  		ResponseNonce: "",
   440  	}
   441  	if diff := cmp.Diff(gotReq, wantReq, protocmp.Transform()); diff != "" {
   442  		t.Fatalf("Unexpected diff in received discovery request, diff (-got, +want):\n%s", diff)
   443  	}
   444  }