google.golang.org/grpc@v1.74.2/xds/internal/clients/lrsclient/loadreport_test.go

google.golang.org/grpc@v1.74.2/xds/internal/clients/lrsclient/loadreport_test.go (about)

     1  /*
     2   *
     3   * Copyright 2024 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package lrsclient_test
    20  
    21  import (
    22  	"context"
    23  	"net"
    24  	"testing"
    25  	"time"
    26  
    27  	"github.com/google/go-cmp/cmp"
    28  	"github.com/google/go-cmp/cmp/cmpopts"
    29  	"github.com/google/uuid"
    30  	"google.golang.org/grpc/codes"
    31  	"google.golang.org/grpc/credentials/insecure"
    32  	"google.golang.org/grpc/internal/grpctest"
    33  	"google.golang.org/grpc/status"
    34  	"google.golang.org/grpc/xds/internal/clients"
    35  	"google.golang.org/grpc/xds/internal/clients/grpctransport"
    36  	"google.golang.org/grpc/xds/internal/clients/internal/testutils"
    37  	"google.golang.org/grpc/xds/internal/clients/internal/testutils/e2e"
    38  	"google.golang.org/grpc/xds/internal/clients/internal/testutils/fakeserver"
    39  	"google.golang.org/grpc/xds/internal/clients/lrsclient"
    40  	lrsclientinternal "google.golang.org/grpc/xds/internal/clients/lrsclient/internal"
    41  	"google.golang.org/protobuf/testing/protocmp"
    42  	"google.golang.org/protobuf/types/known/durationpb"
    43  
    44  	v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
    45  	v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
    46  	v3lrspb "github.com/envoyproxy/go-control-plane/envoy/service/load_stats/v3"
    47  )
    48  
    49  type s struct {
    50  	grpctest.Tester
    51  }
    52  
    53  func Test(t *testing.T) {
    54  	grpctest.RunSubTests(t, s{})
    55  }
    56  
    57  const (
    58  	testKey1                      = "test-key1"
    59  	testKey2                      = "test-key2"
    60  	defaultTestWatchExpiryTimeout = 100 * time.Millisecond
    61  	defaultTestTimeout            = 5 * time.Second
    62  	defaultTestShortTimeout       = 10 * time.Millisecond // For events expected to *not* happen.
    63  )
    64  
    65  var (
    66  	testLocality1     = clients.Locality{Region: "test-region1"}
    67  	testLocality2     = clients.Locality{Region: "test-region2"}
    68  	toleranceCmpOpt   = cmpopts.EquateApprox(0, 1e-5)
    69  	ignoreOrderCmpOpt = protocmp.FilterField(&v3endpointpb.ClusterStats{}, "upstream_locality_stats",
    70  		cmpopts.SortSlices(func(a, b protocmp.Message) bool {
    71  			return a.String() < b.String()
    72  		}),
    73  	)
    74  )
    75  
    76  type wrappedListener struct {
    77  	net.Listener
    78  	newConnChan *testutils.Channel // Connection attempts are pushed here.
    79  }
    80  
    81  func (wl *wrappedListener) Accept() (net.Conn, error) {
    82  	c, err := wl.Listener.Accept()
    83  	if err != nil {
    84  		return nil, err
    85  	}
    86  	wl.newConnChan.Send(struct{}{})
    87  	return c, err
    88  }
    89  
    90  // Tests a load reporting scenario where the LRS client is reporting loads to
    91  // multiple servers. Verifies the following:
    92  //   - calling the load reporting API with different server configuration
    93  //     results in connections being created to those corresponding servers
    94  //   - the same load.Store is not returned when the load reporting API called
    95  //     with different server configurations
    96  //   - canceling the load reporting from the client results in the LRS stream
    97  //     being canceled on the server
    98  func (s) TestReportLoad_ConnectionCreation(t *testing.T) {
    99  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   100  	defer cancel()
   101  
   102  	// Create two management servers that also serve LRS.
   103  	l, err := net.Listen("tcp", "localhost:0")
   104  	if err != nil {
   105  		t.Fatalf("net.Listen() failed: %v", err)
   106  	}
   107  	newConnChan1 := testutils.NewChannelWithSize(1)
   108  	lis1 := &wrappedListener{
   109  		Listener:    l,
   110  		newConnChan: newConnChan1,
   111  	}
   112  	mgmtServer1 := e2e.StartManagementServer(t, e2e.ManagementServerOptions{
   113  		Listener:                    lis1,
   114  		SupportLoadReportingService: true,
   115  	})
   116  	l, err = net.Listen("tcp", "localhost:0")
   117  	if err != nil {
   118  		t.Fatalf("net.Listen() failed: %v", err)
   119  	}
   120  	newConnChan2 := testutils.NewChannelWithSize(1)
   121  	lis2 := &wrappedListener{
   122  		Listener:    l,
   123  		newConnChan: newConnChan2,
   124  	}
   125  	mgmtServer2 := e2e.StartManagementServer(t, e2e.ManagementServerOptions{
   126  		Listener:                    lis2,
   127  		SupportLoadReportingService: true,
   128  	})
   129  
   130  	// Create an LRS client with a configuration that contains both of
   131  	// the above two servers. The authority name is immaterial here since load
   132  	// reporting is per-server and not per-authority.
   133  	nodeID := uuid.New().String()
   134  
   135  	configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}}
   136  	config := lrsclient.Config{
   137  		Node:             clients.Node{ID: nodeID, UserAgentName: "user-agent", UserAgentVersion: "0.0.0.0"},
   138  		TransportBuilder: grpctransport.NewBuilder(configs),
   139  	}
   140  	client, err := lrsclient.New(config)
   141  	if err != nil {
   142  		t.Fatalf("lrsclient.New() failed: %v", err)
   143  	}
   144  
   145  	serverIdentifier1 := clients.ServerIdentifier{ServerURI: mgmtServer1.Address, Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}}
   146  	loadStore1, err := client.ReportLoad(serverIdentifier1)
   147  	if err != nil {
   148  		t.Fatalf("client.ReportLoad() failed: %v", err)
   149  	}
   150  	ssCtx, ssCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout)
   151  	defer ssCancel()
   152  	defer loadStore1.Stop(ssCtx)
   153  
   154  	// Call the load reporting API to report load to the first management
   155  	// server, and ensure that a connection to the server is created.
   156  	if _, err := newConnChan1.Receive(ctx); err != nil {
   157  		t.Fatal("Timeout when waiting for a connection to the first management server, after starting load reporting")
   158  	}
   159  	if _, err := mgmtServer1.LRSServer.LRSStreamOpenChan.Receive(ctx); err != nil {
   160  		t.Fatal("Timeout when waiting for LRS stream to be created")
   161  	}
   162  
   163  	// Call the load reporting API to report load to the first management
   164  	// server, and ensure that a connection to the server is created.
   165  	serverIdentifier2 := clients.ServerIdentifier{ServerURI: mgmtServer2.Address, Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}}
   166  	loadStore2, err := client.ReportLoad(serverIdentifier2)
   167  	if err != nil {
   168  		t.Fatalf("client.ReportLoad() failed: %v", err)
   169  	}
   170  	if _, err := newConnChan2.Receive(ctx); err != nil {
   171  		t.Fatal("Timeout when waiting for a connection to the second management server, after starting load reporting")
   172  	}
   173  	if _, err := mgmtServer2.LRSServer.LRSStreamOpenChan.Receive(ctx); err != nil {
   174  		t.Fatal("Timeout when waiting for LRS stream to be created")
   175  	}
   176  
   177  	if loadStore1 == loadStore2 {
   178  		t.Fatalf("Got same store for different servers, want different")
   179  	}
   180  
   181  	// Push some loads on the received store.
   182  	loadStore2.ReporterForCluster("cluster", "eds").CallDropped("test")
   183  
   184  	// Ensure the initial load reporting request is received at the server.
   185  	lrsServer := mgmtServer2.LRSServer
   186  	req, err := lrsServer.LRSRequestChan.Receive(ctx)
   187  	if err != nil {
   188  		t.Fatalf("Timeout when waiting for initial LRS request: %v", err)
   189  	}
   190  	gotInitialReq := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest)
   191  	nodeProto := &v3corepb.Node{
   192  		Id:                   nodeID,
   193  		UserAgentName:        "user-agent",
   194  		UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: "0.0.0.0"},
   195  		ClientFeatures:       []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw", "envoy.lrs.supports_send_all_clusters"},
   196  	}
   197  	wantInitialReq := &v3lrspb.LoadStatsRequest{Node: nodeProto}
   198  	if diff := cmp.Diff(gotInitialReq, wantInitialReq, protocmp.Transform()); diff != "" {
   199  		t.Fatalf("Unexpected diff in initial LRS request (-got, +want):\n%s", diff)
   200  	}
   201  
   202  	// Send a response from the server with a small deadline.
   203  	lrsServer.LRSResponseChan <- &fakeserver.Response{
   204  		Resp: &v3lrspb.LoadStatsResponse{
   205  			SendAllClusters:       true,
   206  			LoadReportingInterval: &durationpb.Duration{Nanos: 50000000}, // 50ms
   207  		},
   208  	}
   209  
   210  	// Ensure that loads are seen on the server.
   211  	req, err = lrsServer.LRSRequestChan.Receive(ctx)
   212  	if err != nil {
   213  		t.Fatalf("Timeout when waiting for LRS request with loads: %v", err)
   214  	}
   215  	gotLoad := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest).ClusterStats
   216  	if l := len(gotLoad); l != 1 {
   217  		t.Fatalf("Received load for %d clusters, want 1", l)
   218  	}
   219  
   220  	// This field is set by the client to indicate the actual time elapsed since
   221  	// the last report was sent. We cannot deterministically compare this, and
   222  	// we cannot use the cmpopts.IgnoreFields() option on proto structs, since
   223  	// we already use the protocmp.Transform() which marshals the struct into
   224  	// another message. Hence setting this field to nil is the best option here.
   225  	gotLoad[0].LoadReportInterval = nil
   226  	wantLoad := &v3endpointpb.ClusterStats{
   227  		ClusterName:          "cluster",
   228  		ClusterServiceName:   "eds",
   229  		TotalDroppedRequests: 1,
   230  		DroppedRequests:      []*v3endpointpb.ClusterStats_DroppedRequests{{Category: "test", DroppedCount: 1}},
   231  	}
   232  	if diff := cmp.Diff(wantLoad, gotLoad[0], protocmp.Transform(), toleranceCmpOpt, ignoreOrderCmpOpt); diff != "" {
   233  		t.Fatalf("Unexpected diff in LRS request (-got, +want):\n%s", diff)
   234  	}
   235  
   236  	// Stop this load reporting stream, server should see error canceled.
   237  	ssCtx, ssCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout)
   238  	defer ssCancel()
   239  	loadStore2.Stop(ssCtx)
   240  
   241  	// Server should receive a stream canceled error. There may be additional
   242  	// load reports from the client in the channel.
   243  	for {
   244  		if ctx.Err() != nil {
   245  			t.Fatal("Timeout when waiting for the LRS stream to be canceled on the server")
   246  		}
   247  		u, err := lrsServer.LRSRequestChan.Receive(ctx)
   248  		if err != nil {
   249  			continue
   250  		}
   251  		// Ignore load reports sent before the stream was cancelled.
   252  		if u.(*fakeserver.Request).Err == nil {
   253  			continue
   254  		}
   255  		if status.Code(u.(*fakeserver.Request).Err) != codes.Canceled {
   256  			t.Fatalf("Unexpected LRS request: %v, want error canceled", u)
   257  		}
   258  		break
   259  	}
   260  }
   261  
   262  // Tests a load reporting scenario where the load reporting API is called
   263  // multiple times for the same server. The test verifies the following:
   264  //   - calling the load reporting API the second time for the same server
   265  //     configuration does not create a new LRS stream
   266  //   - the LRS stream is closed *only* after all the API calls invoke their
   267  //     cancel functions
   268  //   - creating new streams after the previous one was closed works
   269  func (s) TestReportLoad_StreamCreation(t *testing.T) {
   270  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   271  	defer cancel()
   272  
   273  	// Create a management server that serves LRS.
   274  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{SupportLoadReportingService: true})
   275  
   276  	// Create an LRS client with configuration pointing to the above server.
   277  	nodeID := uuid.New().String()
   278  
   279  	configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}}
   280  	config := lrsclient.Config{
   281  		Node:             clients.Node{ID: nodeID, UserAgentName: "user-agent", UserAgentVersion: "0.0.0.0"},
   282  		TransportBuilder: grpctransport.NewBuilder(configs),
   283  	}
   284  	client, err := lrsclient.New(config)
   285  	if err != nil {
   286  		t.Fatalf("lrsclient.New() failed: %v", err)
   287  	}
   288  
   289  	// Call the load reporting API, and ensure that an LRS stream is created.
   290  	serverIdentifier := clients.ServerIdentifier{ServerURI: mgmtServer.Address, Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}}
   291  	loadStore1, err := client.ReportLoad(serverIdentifier)
   292  	if err != nil {
   293  		t.Fatalf("client.ReportLoad() failed: %v", err)
   294  	}
   295  	lrsServer := mgmtServer.LRSServer
   296  	if _, err := lrsServer.LRSStreamOpenChan.Receive(ctx); err != nil {
   297  		t.Fatalf("Timeout when waiting for LRS stream to be created: %v", err)
   298  	}
   299  
   300  	// Push some loads on the received store.
   301  	loadStore1.ReporterForCluster("cluster1", "eds1").CallDropped("test")
   302  	loadStore1.ReporterForCluster("cluster1", "eds1").CallStarted(testLocality1)
   303  	loadStore1.ReporterForCluster("cluster1", "eds1").CallServerLoad(testLocality1, testKey1, 3.14)
   304  	loadStore1.ReporterForCluster("cluster1", "eds1").CallServerLoad(testLocality1, testKey1, 2.718)
   305  	loadStore1.ReporterForCluster("cluster1", "eds1").CallFinished(testLocality1, nil)
   306  	loadStore1.ReporterForCluster("cluster1", "eds1").CallStarted(testLocality2)
   307  	loadStore1.ReporterForCluster("cluster1", "eds1").CallServerLoad(testLocality2, testKey2, 1.618)
   308  	loadStore1.ReporterForCluster("cluster1", "eds1").CallFinished(testLocality2, nil)
   309  
   310  	// Ensure the initial load reporting request is received at the server.
   311  	req, err := lrsServer.LRSRequestChan.Receive(ctx)
   312  	if err != nil {
   313  		t.Fatalf("Timeout when waiting for initial LRS request: %v", err)
   314  	}
   315  	gotInitialReq := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest)
   316  	nodeProto := &v3corepb.Node{
   317  		Id:                   nodeID,
   318  		UserAgentName:        "user-agent",
   319  		UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: "0.0.0.0"},
   320  		ClientFeatures:       []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw", "envoy.lrs.supports_send_all_clusters"},
   321  	}
   322  	wantInitialReq := &v3lrspb.LoadStatsRequest{Node: nodeProto}
   323  	if diff := cmp.Diff(gotInitialReq, wantInitialReq, protocmp.Transform()); diff != "" {
   324  		t.Fatalf("Unexpected diff in initial LRS request (-got, +want):\n%s", diff)
   325  	}
   326  
   327  	// Send a response from the server with a small deadline.
   328  	lrsServer.LRSResponseChan <- &fakeserver.Response{
   329  		Resp: &v3lrspb.LoadStatsResponse{
   330  			SendAllClusters:       true,
   331  			LoadReportingInterval: &durationpb.Duration{Nanos: 50000000}, // 50ms
   332  		},
   333  	}
   334  
   335  	// Ensure that loads are seen on the server.
   336  	req, err = lrsServer.LRSRequestChan.Receive(ctx)
   337  	if err != nil {
   338  		t.Fatal("Timeout when waiting for LRS request with loads")
   339  	}
   340  	gotLoad := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest).ClusterStats
   341  	if l := len(gotLoad); l != 1 {
   342  		t.Fatalf("Received load for %d clusters, want 1", l)
   343  	}
   344  
   345  	// This field is set by the client to indicate the actual time elapsed since
   346  	// the last report was sent. We cannot deterministically compare this, and
   347  	// we cannot use the cmpopts.IgnoreFields() option on proto structs, since
   348  	// we already use the protocmp.Transform() which marshals the struct into
   349  	// another message. Hence setting this field to nil is the best option here.
   350  	gotLoad[0].LoadReportInterval = nil
   351  	wantLoad := &v3endpointpb.ClusterStats{
   352  		ClusterName:          "cluster1",
   353  		ClusterServiceName:   "eds1",
   354  		TotalDroppedRequests: 1,
   355  		DroppedRequests:      []*v3endpointpb.ClusterStats_DroppedRequests{{Category: "test", DroppedCount: 1}},
   356  		UpstreamLocalityStats: []*v3endpointpb.UpstreamLocalityStats{
   357  			{
   358  				Locality: &v3corepb.Locality{Region: "test-region1"},
   359  				LoadMetricStats: []*v3endpointpb.EndpointLoadMetricStats{
   360  					// TotalMetricValue is the aggregation of 3.14 + 2.718 = 5.858
   361  					{MetricName: testKey1, NumRequestsFinishedWithMetric: 2, TotalMetricValue: 5.858}},
   362  				TotalSuccessfulRequests: 1,
   363  				TotalIssuedRequests:     1,
   364  			},
   365  			{
   366  				Locality: &v3corepb.Locality{Region: "test-region2"},
   367  				LoadMetricStats: []*v3endpointpb.EndpointLoadMetricStats{
   368  					{MetricName: testKey2, NumRequestsFinishedWithMetric: 1, TotalMetricValue: 1.618}},
   369  				TotalSuccessfulRequests: 1,
   370  				TotalIssuedRequests:     1,
   371  			},
   372  		},
   373  	}
   374  	if diff := cmp.Diff(wantLoad, gotLoad[0], protocmp.Transform(), toleranceCmpOpt, ignoreOrderCmpOpt); diff != "" {
   375  		t.Fatalf("Unexpected diff in LRS request (-got, +want):\n%s", diff)
   376  	}
   377  
   378  	// Make another call to the load reporting API, and ensure that a new LRS
   379  	// stream is not created.
   380  	loadStore2, err := client.ReportLoad(serverIdentifier)
   381  	if err != nil {
   382  		t.Fatalf("client.ReportLoad() failed: %v", err)
   383  	}
   384  	sCtx, sCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout)
   385  	defer sCancel()
   386  	if _, err := lrsServer.LRSStreamOpenChan.Receive(sCtx); err != context.DeadlineExceeded {
   387  		t.Fatal("New LRS stream created when expected to use an existing one")
   388  	}
   389  
   390  	// Push more loads.
   391  	loadStore2.ReporterForCluster("cluster2", "eds2").CallDropped("test")
   392  
   393  	// Ensure that loads are seen on the server. We need a loop here because
   394  	// there could have been some requests from the client in the time between
   395  	// us reading the first request and now. Those would have been queued in the
   396  	// request channel that we read out of.
   397  	for {
   398  		if ctx.Err() != nil {
   399  			t.Fatalf("Timeout when waiting for new loads to be seen on the server")
   400  		}
   401  
   402  		req, err = lrsServer.LRSRequestChan.Receive(ctx)
   403  		if err != nil {
   404  			continue
   405  		}
   406  		gotLoad = req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest).ClusterStats
   407  		if l := len(gotLoad); l != 1 {
   408  			continue
   409  		}
   410  		gotLoad[0].LoadReportInterval = nil
   411  		wantLoad := &v3endpointpb.ClusterStats{
   412  			ClusterName:          "cluster2",
   413  			ClusterServiceName:   "eds2",
   414  			TotalDroppedRequests: 1,
   415  			DroppedRequests:      []*v3endpointpb.ClusterStats_DroppedRequests{{Category: "test", DroppedCount: 1}},
   416  		}
   417  		if diff := cmp.Diff(wantLoad, gotLoad[0], protocmp.Transform()); diff != "" {
   418  			t.Logf("Unexpected diff in LRS request (-got, +want):\n%s", diff)
   419  			continue
   420  		}
   421  		break
   422  	}
   423  
   424  	// Cancel the first load reporting call, and ensure that the stream does not
   425  	// close (because we have another call open).
   426  	ssCtx, ssCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout)
   427  	defer ssCancel()
   428  	loadStore1.Stop(ssCtx)
   429  	sCtx, sCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout)
   430  	defer sCancel()
   431  	if _, err := lrsServer.LRSStreamCloseChan.Receive(sCtx); err != context.DeadlineExceeded {
   432  		t.Fatal("LRS stream closed when expected to stay open")
   433  	}
   434  
   435  	// Stop the second load reporting call, and ensure the stream is closed.
   436  	ssCtx, ssCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout)
   437  	defer ssCancel()
   438  	loadStore2.Stop(ssCtx)
   439  	if _, err := lrsServer.LRSStreamCloseChan.Receive(ctx); err != nil {
   440  		t.Fatal("Timeout waiting for LRS stream to close")
   441  	}
   442  
   443  	// Calling the load reporting API again should result in the creation of a
   444  	// new LRS stream. This ensures that creating and closing multiple streams
   445  	// works smoothly.
   446  	loadStore3, err := client.ReportLoad(serverIdentifier)
   447  	if err != nil {
   448  		t.Fatalf("client.ReportLoad() failed: %v", err)
   449  	}
   450  	if _, err := lrsServer.LRSStreamOpenChan.Receive(ctx); err != nil {
   451  		t.Fatalf("Timeout when waiting for LRS stream to be created: %v", err)
   452  	}
   453  	ssCtx, ssCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout)
   454  	defer ssCancel()
   455  	loadStore3.Stop(ssCtx)
   456  }
   457  
   458  // TestReportLoad_StopWithContext tests the behavior of LoadStore.Stop() when
   459  // called with a context. It verifies that:
   460  //   - Stop() blocks until the context expires or final load send attempt is
   461  //     made.
   462  //   - Final load report is seen on the server after stop is called.
   463  //   - The stream is closed after Stop() returns.
   464  func (s) TestReportLoad_StopWithContext(t *testing.T) {
   465  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   466  	defer cancel()
   467  
   468  	// Create a management server that serves LRS.
   469  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{SupportLoadReportingService: true})
   470  
   471  	// Create an LRS client with configuration pointing to the above server.
   472  	nodeID := uuid.New().String()
   473  
   474  	configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}}
   475  	config := lrsclient.Config{
   476  		Node:             clients.Node{ID: nodeID, UserAgentName: "user-agent", UserAgentVersion: "0.0.0.0"},
   477  		TransportBuilder: grpctransport.NewBuilder(configs),
   478  	}
   479  	client, err := lrsclient.New(config)
   480  	if err != nil {
   481  		t.Fatalf("lrsclient.New() failed: %v", err)
   482  	}
   483  
   484  	// Call the load reporting API, and ensure that an LRS stream is created.
   485  	serverIdentifier := clients.ServerIdentifier{ServerURI: mgmtServer.Address, Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}}
   486  	loadStore, err := client.ReportLoad(serverIdentifier)
   487  	if err != nil {
   488  		t.Fatalf("client.ReportLoad() failed: %v", err)
   489  	}
   490  	lrsServer := mgmtServer.LRSServer
   491  	if _, err := lrsServer.LRSStreamOpenChan.Receive(ctx); err != nil {
   492  		t.Fatalf("Timeout when waiting for LRS stream to be created: %v", err)
   493  	}
   494  
   495  	// Push some loads on the received store.
   496  	loadStore.ReporterForCluster("cluster1", "eds1").CallDropped("test")
   497  
   498  	// Ensure the initial load reporting request is received at the server.
   499  	req, err := lrsServer.LRSRequestChan.Receive(ctx)
   500  	if err != nil {
   501  		t.Fatalf("Timeout when waiting for initial LRS request: %v", err)
   502  	}
   503  	gotInitialReq := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest)
   504  	nodeProto := &v3corepb.Node{
   505  		Id:                   nodeID,
   506  		UserAgentName:        "user-agent",
   507  		UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: "0.0.0.0"},
   508  		ClientFeatures:       []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw", "envoy.lrs.supports_send_all_clusters"},
   509  	}
   510  	wantInitialReq := &v3lrspb.LoadStatsRequest{Node: nodeProto}
   511  	if diff := cmp.Diff(gotInitialReq, wantInitialReq, protocmp.Transform()); diff != "" {
   512  		t.Fatalf("Unexpected diff in initial LRS request (-got, +want):\n%s", diff)
   513  	}
   514  
   515  	// Send a response from the server with a small deadline.
   516  	lrsServer.LRSResponseChan <- &fakeserver.Response{
   517  		Resp: &v3lrspb.LoadStatsResponse{
   518  			SendAllClusters:       true,
   519  			LoadReportingInterval: &durationpb.Duration{Nanos: 50000000}, // 50ms
   520  		},
   521  	}
   522  
   523  	// Ensure that loads are seen on the server.
   524  	req, err = lrsServer.LRSRequestChan.Receive(ctx)
   525  	if err != nil {
   526  		t.Fatal("Timeout when waiting for LRS request with loads")
   527  	}
   528  	gotLoad := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest).ClusterStats
   529  	if l := len(gotLoad); l != 1 {
   530  		t.Fatalf("Received load for %d clusters, want 1", l)
   531  	}
   532  
   533  	// This field is set by the client to indicate the actual time elapsed since
   534  	// the last report was sent. We cannot deterministically compare this, and
   535  	// we cannot use the cmpopts.IgnoreFields() option on proto structs, since
   536  	// we already use the protocmp.Transform() which marshals the struct into
   537  	// another message. Hence setting this field to nil is the best option here.
   538  	gotLoad[0].LoadReportInterval = nil
   539  	wantLoad := &v3endpointpb.ClusterStats{
   540  		ClusterName:          "cluster1",
   541  		ClusterServiceName:   "eds1",
   542  		TotalDroppedRequests: 1,
   543  		DroppedRequests:      []*v3endpointpb.ClusterStats_DroppedRequests{{Category: "test", DroppedCount: 1}},
   544  	}
   545  	if diff := cmp.Diff(wantLoad, gotLoad[0], protocmp.Transform(), toleranceCmpOpt, ignoreOrderCmpOpt); diff != "" {
   546  		t.Fatalf("Unexpected diff in LRS request (-got, +want):\n%s", diff)
   547  	}
   548  
   549  	// Create a context for Stop() that remains until the end of test to ensure
   550  	// that only possibility of Stop()s to finish is if final load send attempt
   551  	// is made. If final load attempt is not made, test will timeout.
   552  	stopCtx, stopCancel := context.WithCancel(ctx)
   553  	defer stopCancel()
   554  
   555  	// Push more loads.
   556  	loadStore.ReporterForCluster("cluster2", "eds2").CallDropped("test")
   557  
   558  	stopCalled := make(chan struct{})
   559  	// Call Stop in a separate goroutine. It will block until
   560  	// final load send attempt is made.
   561  	go func() {
   562  		loadStore.Stop(stopCtx)
   563  		close(stopCalled)
   564  	}()
   565  	<-stopCalled
   566  
   567  	// Ensure that loads are seen on the server. We need a loop here because
   568  	// there could have been some requests from the client in the time between
   569  	// us reading the first request and now. Those would have been queued in the
   570  	// request channel that we read out of.
   571  	for {
   572  		if ctx.Err() != nil {
   573  			t.Fatalf("Timeout when waiting for new loads to be seen on the server")
   574  		}
   575  
   576  		req, err = lrsServer.LRSRequestChan.Receive(ctx)
   577  		if err != nil || req.(*fakeserver.Request).Err != nil {
   578  			continue
   579  		}
   580  		if req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest) == nil {
   581  			// This can happen due to a race:
   582  			// 1. Load for "cluster2" is reported just before Stop().
   583  			// 2. The periodic ticker might send this load before Stop()'s
   584  			//    final send mechanism processes it, clearing the data.
   585  			// 3. Stop()'s final send might then send an empty report.
   586  			//    This is acceptable for this test because we only need to verify
   587  			//    if the final load report send attempt was made.
   588  			t.Logf("Empty final load report sent on server")
   589  			break
   590  		}
   591  		gotLoad = req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest).ClusterStats
   592  		if l := len(gotLoad); l != 1 {
   593  			continue
   594  		}
   595  		gotLoad[0].LoadReportInterval = nil
   596  		wantLoad := &v3endpointpb.ClusterStats{
   597  			ClusterName:          "cluster2",
   598  			ClusterServiceName:   "eds2",
   599  			TotalDroppedRequests: 1,
   600  			DroppedRequests:      []*v3endpointpb.ClusterStats_DroppedRequests{{Category: "test", DroppedCount: 1}},
   601  		}
   602  		if diff := cmp.Diff(wantLoad, gotLoad[0], protocmp.Transform()); diff != "" {
   603  			t.Logf("Unexpected diff in LRS request (-got, +want):\n%s", diff)
   604  			continue
   605  		}
   606  		break
   607  	}
   608  
   609  	// Verify the stream is eventually closed on the server side.
   610  	if _, err := lrsServer.LRSStreamCloseChan.Receive(ctx); err != nil {
   611  		t.Fatal("Timeout waiting for LRS stream to close")
   612  	}
   613  }
   614  
   615  // TestReportLoad_LoadReportInterval tests verify that the load report interval
   616  // received by the LRS server is the duration between start of last load
   617  // reporting by the client and the time when the load is reported to the LRS
   618  // server.
   619  func (s) TestReportLoad_LoadReportInterval(t *testing.T) {
   620  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   621  	defer cancel()
   622  
   623  	originalTimeNow := lrsclientinternal.TimeNow
   624  	t.Cleanup(func() { lrsclientinternal.TimeNow = originalTimeNow })
   625  
   626  	// Create a management server that serves LRS.
   627  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{SupportLoadReportingService: true})
   628  
   629  	// Create an LRS client with configuration pointing to the above server.
   630  	nodeID := uuid.New().String()
   631  
   632  	configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}}
   633  	config := lrsclient.Config{
   634  		Node:             clients.Node{ID: nodeID, UserAgentName: "user-agent", UserAgentVersion: "0.0.0.0"},
   635  		TransportBuilder: grpctransport.NewBuilder(configs),
   636  	}
   637  	client, err := lrsclient.New(config)
   638  	if err != nil {
   639  		t.Fatalf("lrsclient.New() failed: %v", err)
   640  	}
   641  
   642  	// Call the load reporting API, and ensure that an LRS stream is created.
   643  	serverIdentifier := clients.ServerIdentifier{ServerURI: mgmtServer.Address, Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}}
   644  	loadStore1, err := client.ReportLoad(serverIdentifier)
   645  	if err != nil {
   646  		t.Fatalf("client.ReportLoad() failed: %v", err)
   647  	}
   648  	lrsServer := mgmtServer.LRSServer
   649  	if _, err := lrsServer.LRSStreamOpenChan.Receive(ctx); err != nil {
   650  		t.Fatalf("Timeout when waiting for LRS stream to be created: %v", err)
   651  	}
   652  
   653  	// Initial time for reporter creation
   654  	currentTime := time.Now()
   655  	lrsclientinternal.TimeNow = func() time.Time {
   656  		return currentTime
   657  	}
   658  
   659  	// Report dummy drop to ensure stats is not nil.
   660  	loadStore1.ReporterForCluster("cluster1", "eds1").CallDropped("test")
   661  
   662  	// Update currentTime to simulate the passage of time between the reporter
   663  	// creation and first stats() call.
   664  	currentTime = currentTime.Add(5 * time.Second)
   665  
   666  	// Ensure the initial load reporting request is received at the server.
   667  	req, err := lrsServer.LRSRequestChan.Receive(ctx)
   668  	if err != nil {
   669  		t.Fatalf("Timeout when waiting for initial LRS request: %v", err)
   670  	}
   671  	gotInitialReq := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest)
   672  	nodeProto := &v3corepb.Node{
   673  		Id:                   nodeID,
   674  		UserAgentName:        "user-agent",
   675  		UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: "0.0.0.0"},
   676  		ClientFeatures:       []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw", "envoy.lrs.supports_send_all_clusters"},
   677  	}
   678  	wantInitialReq := &v3lrspb.LoadStatsRequest{Node: nodeProto}
   679  	if diff := cmp.Diff(gotInitialReq, wantInitialReq, protocmp.Transform()); diff != "" {
   680  		t.Fatalf("Unexpected diff in initial LRS request (-got, +want):\n%s", diff)
   681  	}
   682  
   683  	// Send a response from the server with a small deadline.
   684  	lrsServer.LRSResponseChan <- &fakeserver.Response{
   685  		Resp: &v3lrspb.LoadStatsResponse{
   686  			SendAllClusters:       true,
   687  			LoadReportingInterval: &durationpb.Duration{Nanos: 50000000}, // 50ms
   688  		},
   689  	}
   690  
   691  	// Ensure that loads are seen on the server.
   692  	req, err = lrsServer.LRSRequestChan.Receive(ctx)
   693  	if err != nil {
   694  		t.Fatal("Timeout when waiting for LRS request with loads")
   695  	}
   696  	gotLoad := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest).ClusterStats
   697  	if l := len(gotLoad); l != 1 {
   698  		t.Fatalf("Received load for %d clusters, want 1", l)
   699  	}
   700  	// Verify load received at LRS server has load report interval calculated
   701  	// from the time of reporter creation.
   702  	if got, want := gotLoad[0].GetLoadReportInterval().AsDuration(), 5*time.Second; got != want {
   703  		t.Errorf("Got load report interval %v, want %v", got, want)
   704  	}
   705  
   706  	ssCtx, ssCancel := context.WithTimeout(context.Background(), time.Millisecond)
   707  	defer ssCancel()
   708  	loadStore1.Stop(ssCtx)
   709  }