google.golang.org/grpc@v1.74.2/xds/internal/clients/lrsclient/loadreport_test.go (about) 1 /* 2 * 3 * Copyright 2024 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package lrsclient_test 20 21 import ( 22 "context" 23 "net" 24 "testing" 25 "time" 26 27 "github.com/google/go-cmp/cmp" 28 "github.com/google/go-cmp/cmp/cmpopts" 29 "github.com/google/uuid" 30 "google.golang.org/grpc/codes" 31 "google.golang.org/grpc/credentials/insecure" 32 "google.golang.org/grpc/internal/grpctest" 33 "google.golang.org/grpc/status" 34 "google.golang.org/grpc/xds/internal/clients" 35 "google.golang.org/grpc/xds/internal/clients/grpctransport" 36 "google.golang.org/grpc/xds/internal/clients/internal/testutils" 37 "google.golang.org/grpc/xds/internal/clients/internal/testutils/e2e" 38 "google.golang.org/grpc/xds/internal/clients/internal/testutils/fakeserver" 39 "google.golang.org/grpc/xds/internal/clients/lrsclient" 40 lrsclientinternal "google.golang.org/grpc/xds/internal/clients/lrsclient/internal" 41 "google.golang.org/protobuf/testing/protocmp" 42 "google.golang.org/protobuf/types/known/durationpb" 43 44 v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 45 v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" 46 v3lrspb "github.com/envoyproxy/go-control-plane/envoy/service/load_stats/v3" 47 ) 48 49 type s struct { 50 grpctest.Tester 51 } 52 53 func Test(t *testing.T) { 54 grpctest.RunSubTests(t, s{}) 55 } 56 57 const ( 58 testKey1 = "test-key1" 59 testKey2 = "test-key2" 60 defaultTestWatchExpiryTimeout = 100 * time.Millisecond 61 defaultTestTimeout = 5 * time.Second 62 defaultTestShortTimeout = 10 * time.Millisecond // For events expected to *not* happen. 63 ) 64 65 var ( 66 testLocality1 = clients.Locality{Region: "test-region1"} 67 testLocality2 = clients.Locality{Region: "test-region2"} 68 toleranceCmpOpt = cmpopts.EquateApprox(0, 1e-5) 69 ignoreOrderCmpOpt = protocmp.FilterField(&v3endpointpb.ClusterStats{}, "upstream_locality_stats", 70 cmpopts.SortSlices(func(a, b protocmp.Message) bool { 71 return a.String() < b.String() 72 }), 73 ) 74 ) 75 76 type wrappedListener struct { 77 net.Listener 78 newConnChan *testutils.Channel // Connection attempts are pushed here. 79 } 80 81 func (wl *wrappedListener) Accept() (net.Conn, error) { 82 c, err := wl.Listener.Accept() 83 if err != nil { 84 return nil, err 85 } 86 wl.newConnChan.Send(struct{}{}) 87 return c, err 88 } 89 90 // Tests a load reporting scenario where the LRS client is reporting loads to 91 // multiple servers. Verifies the following: 92 // - calling the load reporting API with different server configuration 93 // results in connections being created to those corresponding servers 94 // - the same load.Store is not returned when the load reporting API called 95 // with different server configurations 96 // - canceling the load reporting from the client results in the LRS stream 97 // being canceled on the server 98 func (s) TestReportLoad_ConnectionCreation(t *testing.T) { 99 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 100 defer cancel() 101 102 // Create two management servers that also serve LRS. 103 l, err := net.Listen("tcp", "localhost:0") 104 if err != nil { 105 t.Fatalf("net.Listen() failed: %v", err) 106 } 107 newConnChan1 := testutils.NewChannelWithSize(1) 108 lis1 := &wrappedListener{ 109 Listener: l, 110 newConnChan: newConnChan1, 111 } 112 mgmtServer1 := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 113 Listener: lis1, 114 SupportLoadReportingService: true, 115 }) 116 l, err = net.Listen("tcp", "localhost:0") 117 if err != nil { 118 t.Fatalf("net.Listen() failed: %v", err) 119 } 120 newConnChan2 := testutils.NewChannelWithSize(1) 121 lis2 := &wrappedListener{ 122 Listener: l, 123 newConnChan: newConnChan2, 124 } 125 mgmtServer2 := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 126 Listener: lis2, 127 SupportLoadReportingService: true, 128 }) 129 130 // Create an LRS client with a configuration that contains both of 131 // the above two servers. The authority name is immaterial here since load 132 // reporting is per-server and not per-authority. 133 nodeID := uuid.New().String() 134 135 configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}} 136 config := lrsclient.Config{ 137 Node: clients.Node{ID: nodeID, UserAgentName: "user-agent", UserAgentVersion: "0.0.0.0"}, 138 TransportBuilder: grpctransport.NewBuilder(configs), 139 } 140 client, err := lrsclient.New(config) 141 if err != nil { 142 t.Fatalf("lrsclient.New() failed: %v", err) 143 } 144 145 serverIdentifier1 := clients.ServerIdentifier{ServerURI: mgmtServer1.Address, Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}} 146 loadStore1, err := client.ReportLoad(serverIdentifier1) 147 if err != nil { 148 t.Fatalf("client.ReportLoad() failed: %v", err) 149 } 150 ssCtx, ssCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) 151 defer ssCancel() 152 defer loadStore1.Stop(ssCtx) 153 154 // Call the load reporting API to report load to the first management 155 // server, and ensure that a connection to the server is created. 156 if _, err := newConnChan1.Receive(ctx); err != nil { 157 t.Fatal("Timeout when waiting for a connection to the first management server, after starting load reporting") 158 } 159 if _, err := mgmtServer1.LRSServer.LRSStreamOpenChan.Receive(ctx); err != nil { 160 t.Fatal("Timeout when waiting for LRS stream to be created") 161 } 162 163 // Call the load reporting API to report load to the first management 164 // server, and ensure that a connection to the server is created. 165 serverIdentifier2 := clients.ServerIdentifier{ServerURI: mgmtServer2.Address, Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}} 166 loadStore2, err := client.ReportLoad(serverIdentifier2) 167 if err != nil { 168 t.Fatalf("client.ReportLoad() failed: %v", err) 169 } 170 if _, err := newConnChan2.Receive(ctx); err != nil { 171 t.Fatal("Timeout when waiting for a connection to the second management server, after starting load reporting") 172 } 173 if _, err := mgmtServer2.LRSServer.LRSStreamOpenChan.Receive(ctx); err != nil { 174 t.Fatal("Timeout when waiting for LRS stream to be created") 175 } 176 177 if loadStore1 == loadStore2 { 178 t.Fatalf("Got same store for different servers, want different") 179 } 180 181 // Push some loads on the received store. 182 loadStore2.ReporterForCluster("cluster", "eds").CallDropped("test") 183 184 // Ensure the initial load reporting request is received at the server. 185 lrsServer := mgmtServer2.LRSServer 186 req, err := lrsServer.LRSRequestChan.Receive(ctx) 187 if err != nil { 188 t.Fatalf("Timeout when waiting for initial LRS request: %v", err) 189 } 190 gotInitialReq := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest) 191 nodeProto := &v3corepb.Node{ 192 Id: nodeID, 193 UserAgentName: "user-agent", 194 UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: "0.0.0.0"}, 195 ClientFeatures: []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw", "envoy.lrs.supports_send_all_clusters"}, 196 } 197 wantInitialReq := &v3lrspb.LoadStatsRequest{Node: nodeProto} 198 if diff := cmp.Diff(gotInitialReq, wantInitialReq, protocmp.Transform()); diff != "" { 199 t.Fatalf("Unexpected diff in initial LRS request (-got, +want):\n%s", diff) 200 } 201 202 // Send a response from the server with a small deadline. 203 lrsServer.LRSResponseChan <- &fakeserver.Response{ 204 Resp: &v3lrspb.LoadStatsResponse{ 205 SendAllClusters: true, 206 LoadReportingInterval: &durationpb.Duration{Nanos: 50000000}, // 50ms 207 }, 208 } 209 210 // Ensure that loads are seen on the server. 211 req, err = lrsServer.LRSRequestChan.Receive(ctx) 212 if err != nil { 213 t.Fatalf("Timeout when waiting for LRS request with loads: %v", err) 214 } 215 gotLoad := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest).ClusterStats 216 if l := len(gotLoad); l != 1 { 217 t.Fatalf("Received load for %d clusters, want 1", l) 218 } 219 220 // This field is set by the client to indicate the actual time elapsed since 221 // the last report was sent. We cannot deterministically compare this, and 222 // we cannot use the cmpopts.IgnoreFields() option on proto structs, since 223 // we already use the protocmp.Transform() which marshals the struct into 224 // another message. Hence setting this field to nil is the best option here. 225 gotLoad[0].LoadReportInterval = nil 226 wantLoad := &v3endpointpb.ClusterStats{ 227 ClusterName: "cluster", 228 ClusterServiceName: "eds", 229 TotalDroppedRequests: 1, 230 DroppedRequests: []*v3endpointpb.ClusterStats_DroppedRequests{{Category: "test", DroppedCount: 1}}, 231 } 232 if diff := cmp.Diff(wantLoad, gotLoad[0], protocmp.Transform(), toleranceCmpOpt, ignoreOrderCmpOpt); diff != "" { 233 t.Fatalf("Unexpected diff in LRS request (-got, +want):\n%s", diff) 234 } 235 236 // Stop this load reporting stream, server should see error canceled. 237 ssCtx, ssCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout) 238 defer ssCancel() 239 loadStore2.Stop(ssCtx) 240 241 // Server should receive a stream canceled error. There may be additional 242 // load reports from the client in the channel. 243 for { 244 if ctx.Err() != nil { 245 t.Fatal("Timeout when waiting for the LRS stream to be canceled on the server") 246 } 247 u, err := lrsServer.LRSRequestChan.Receive(ctx) 248 if err != nil { 249 continue 250 } 251 // Ignore load reports sent before the stream was cancelled. 252 if u.(*fakeserver.Request).Err == nil { 253 continue 254 } 255 if status.Code(u.(*fakeserver.Request).Err) != codes.Canceled { 256 t.Fatalf("Unexpected LRS request: %v, want error canceled", u) 257 } 258 break 259 } 260 } 261 262 // Tests a load reporting scenario where the load reporting API is called 263 // multiple times for the same server. The test verifies the following: 264 // - calling the load reporting API the second time for the same server 265 // configuration does not create a new LRS stream 266 // - the LRS stream is closed *only* after all the API calls invoke their 267 // cancel functions 268 // - creating new streams after the previous one was closed works 269 func (s) TestReportLoad_StreamCreation(t *testing.T) { 270 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 271 defer cancel() 272 273 // Create a management server that serves LRS. 274 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{SupportLoadReportingService: true}) 275 276 // Create an LRS client with configuration pointing to the above server. 277 nodeID := uuid.New().String() 278 279 configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}} 280 config := lrsclient.Config{ 281 Node: clients.Node{ID: nodeID, UserAgentName: "user-agent", UserAgentVersion: "0.0.0.0"}, 282 TransportBuilder: grpctransport.NewBuilder(configs), 283 } 284 client, err := lrsclient.New(config) 285 if err != nil { 286 t.Fatalf("lrsclient.New() failed: %v", err) 287 } 288 289 // Call the load reporting API, and ensure that an LRS stream is created. 290 serverIdentifier := clients.ServerIdentifier{ServerURI: mgmtServer.Address, Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}} 291 loadStore1, err := client.ReportLoad(serverIdentifier) 292 if err != nil { 293 t.Fatalf("client.ReportLoad() failed: %v", err) 294 } 295 lrsServer := mgmtServer.LRSServer 296 if _, err := lrsServer.LRSStreamOpenChan.Receive(ctx); err != nil { 297 t.Fatalf("Timeout when waiting for LRS stream to be created: %v", err) 298 } 299 300 // Push some loads on the received store. 301 loadStore1.ReporterForCluster("cluster1", "eds1").CallDropped("test") 302 loadStore1.ReporterForCluster("cluster1", "eds1").CallStarted(testLocality1) 303 loadStore1.ReporterForCluster("cluster1", "eds1").CallServerLoad(testLocality1, testKey1, 3.14) 304 loadStore1.ReporterForCluster("cluster1", "eds1").CallServerLoad(testLocality1, testKey1, 2.718) 305 loadStore1.ReporterForCluster("cluster1", "eds1").CallFinished(testLocality1, nil) 306 loadStore1.ReporterForCluster("cluster1", "eds1").CallStarted(testLocality2) 307 loadStore1.ReporterForCluster("cluster1", "eds1").CallServerLoad(testLocality2, testKey2, 1.618) 308 loadStore1.ReporterForCluster("cluster1", "eds1").CallFinished(testLocality2, nil) 309 310 // Ensure the initial load reporting request is received at the server. 311 req, err := lrsServer.LRSRequestChan.Receive(ctx) 312 if err != nil { 313 t.Fatalf("Timeout when waiting for initial LRS request: %v", err) 314 } 315 gotInitialReq := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest) 316 nodeProto := &v3corepb.Node{ 317 Id: nodeID, 318 UserAgentName: "user-agent", 319 UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: "0.0.0.0"}, 320 ClientFeatures: []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw", "envoy.lrs.supports_send_all_clusters"}, 321 } 322 wantInitialReq := &v3lrspb.LoadStatsRequest{Node: nodeProto} 323 if diff := cmp.Diff(gotInitialReq, wantInitialReq, protocmp.Transform()); diff != "" { 324 t.Fatalf("Unexpected diff in initial LRS request (-got, +want):\n%s", diff) 325 } 326 327 // Send a response from the server with a small deadline. 328 lrsServer.LRSResponseChan <- &fakeserver.Response{ 329 Resp: &v3lrspb.LoadStatsResponse{ 330 SendAllClusters: true, 331 LoadReportingInterval: &durationpb.Duration{Nanos: 50000000}, // 50ms 332 }, 333 } 334 335 // Ensure that loads are seen on the server. 336 req, err = lrsServer.LRSRequestChan.Receive(ctx) 337 if err != nil { 338 t.Fatal("Timeout when waiting for LRS request with loads") 339 } 340 gotLoad := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest).ClusterStats 341 if l := len(gotLoad); l != 1 { 342 t.Fatalf("Received load for %d clusters, want 1", l) 343 } 344 345 // This field is set by the client to indicate the actual time elapsed since 346 // the last report was sent. We cannot deterministically compare this, and 347 // we cannot use the cmpopts.IgnoreFields() option on proto structs, since 348 // we already use the protocmp.Transform() which marshals the struct into 349 // another message. Hence setting this field to nil is the best option here. 350 gotLoad[0].LoadReportInterval = nil 351 wantLoad := &v3endpointpb.ClusterStats{ 352 ClusterName: "cluster1", 353 ClusterServiceName: "eds1", 354 TotalDroppedRequests: 1, 355 DroppedRequests: []*v3endpointpb.ClusterStats_DroppedRequests{{Category: "test", DroppedCount: 1}}, 356 UpstreamLocalityStats: []*v3endpointpb.UpstreamLocalityStats{ 357 { 358 Locality: &v3corepb.Locality{Region: "test-region1"}, 359 LoadMetricStats: []*v3endpointpb.EndpointLoadMetricStats{ 360 // TotalMetricValue is the aggregation of 3.14 + 2.718 = 5.858 361 {MetricName: testKey1, NumRequestsFinishedWithMetric: 2, TotalMetricValue: 5.858}}, 362 TotalSuccessfulRequests: 1, 363 TotalIssuedRequests: 1, 364 }, 365 { 366 Locality: &v3corepb.Locality{Region: "test-region2"}, 367 LoadMetricStats: []*v3endpointpb.EndpointLoadMetricStats{ 368 {MetricName: testKey2, NumRequestsFinishedWithMetric: 1, TotalMetricValue: 1.618}}, 369 TotalSuccessfulRequests: 1, 370 TotalIssuedRequests: 1, 371 }, 372 }, 373 } 374 if diff := cmp.Diff(wantLoad, gotLoad[0], protocmp.Transform(), toleranceCmpOpt, ignoreOrderCmpOpt); diff != "" { 375 t.Fatalf("Unexpected diff in LRS request (-got, +want):\n%s", diff) 376 } 377 378 // Make another call to the load reporting API, and ensure that a new LRS 379 // stream is not created. 380 loadStore2, err := client.ReportLoad(serverIdentifier) 381 if err != nil { 382 t.Fatalf("client.ReportLoad() failed: %v", err) 383 } 384 sCtx, sCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) 385 defer sCancel() 386 if _, err := lrsServer.LRSStreamOpenChan.Receive(sCtx); err != context.DeadlineExceeded { 387 t.Fatal("New LRS stream created when expected to use an existing one") 388 } 389 390 // Push more loads. 391 loadStore2.ReporterForCluster("cluster2", "eds2").CallDropped("test") 392 393 // Ensure that loads are seen on the server. We need a loop here because 394 // there could have been some requests from the client in the time between 395 // us reading the first request and now. Those would have been queued in the 396 // request channel that we read out of. 397 for { 398 if ctx.Err() != nil { 399 t.Fatalf("Timeout when waiting for new loads to be seen on the server") 400 } 401 402 req, err = lrsServer.LRSRequestChan.Receive(ctx) 403 if err != nil { 404 continue 405 } 406 gotLoad = req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest).ClusterStats 407 if l := len(gotLoad); l != 1 { 408 continue 409 } 410 gotLoad[0].LoadReportInterval = nil 411 wantLoad := &v3endpointpb.ClusterStats{ 412 ClusterName: "cluster2", 413 ClusterServiceName: "eds2", 414 TotalDroppedRequests: 1, 415 DroppedRequests: []*v3endpointpb.ClusterStats_DroppedRequests{{Category: "test", DroppedCount: 1}}, 416 } 417 if diff := cmp.Diff(wantLoad, gotLoad[0], protocmp.Transform()); diff != "" { 418 t.Logf("Unexpected diff in LRS request (-got, +want):\n%s", diff) 419 continue 420 } 421 break 422 } 423 424 // Cancel the first load reporting call, and ensure that the stream does not 425 // close (because we have another call open). 426 ssCtx, ssCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) 427 defer ssCancel() 428 loadStore1.Stop(ssCtx) 429 sCtx, sCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout) 430 defer sCancel() 431 if _, err := lrsServer.LRSStreamCloseChan.Receive(sCtx); err != context.DeadlineExceeded { 432 t.Fatal("LRS stream closed when expected to stay open") 433 } 434 435 // Stop the second load reporting call, and ensure the stream is closed. 436 ssCtx, ssCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout) 437 defer ssCancel() 438 loadStore2.Stop(ssCtx) 439 if _, err := lrsServer.LRSStreamCloseChan.Receive(ctx); err != nil { 440 t.Fatal("Timeout waiting for LRS stream to close") 441 } 442 443 // Calling the load reporting API again should result in the creation of a 444 // new LRS stream. This ensures that creating and closing multiple streams 445 // works smoothly. 446 loadStore3, err := client.ReportLoad(serverIdentifier) 447 if err != nil { 448 t.Fatalf("client.ReportLoad() failed: %v", err) 449 } 450 if _, err := lrsServer.LRSStreamOpenChan.Receive(ctx); err != nil { 451 t.Fatalf("Timeout when waiting for LRS stream to be created: %v", err) 452 } 453 ssCtx, ssCancel = context.WithTimeout(context.Background(), defaultTestShortTimeout) 454 defer ssCancel() 455 loadStore3.Stop(ssCtx) 456 } 457 458 // TestReportLoad_StopWithContext tests the behavior of LoadStore.Stop() when 459 // called with a context. It verifies that: 460 // - Stop() blocks until the context expires or final load send attempt is 461 // made. 462 // - Final load report is seen on the server after stop is called. 463 // - The stream is closed after Stop() returns. 464 func (s) TestReportLoad_StopWithContext(t *testing.T) { 465 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 466 defer cancel() 467 468 // Create a management server that serves LRS. 469 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{SupportLoadReportingService: true}) 470 471 // Create an LRS client with configuration pointing to the above server. 472 nodeID := uuid.New().String() 473 474 configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}} 475 config := lrsclient.Config{ 476 Node: clients.Node{ID: nodeID, UserAgentName: "user-agent", UserAgentVersion: "0.0.0.0"}, 477 TransportBuilder: grpctransport.NewBuilder(configs), 478 } 479 client, err := lrsclient.New(config) 480 if err != nil { 481 t.Fatalf("lrsclient.New() failed: %v", err) 482 } 483 484 // Call the load reporting API, and ensure that an LRS stream is created. 485 serverIdentifier := clients.ServerIdentifier{ServerURI: mgmtServer.Address, Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}} 486 loadStore, err := client.ReportLoad(serverIdentifier) 487 if err != nil { 488 t.Fatalf("client.ReportLoad() failed: %v", err) 489 } 490 lrsServer := mgmtServer.LRSServer 491 if _, err := lrsServer.LRSStreamOpenChan.Receive(ctx); err != nil { 492 t.Fatalf("Timeout when waiting for LRS stream to be created: %v", err) 493 } 494 495 // Push some loads on the received store. 496 loadStore.ReporterForCluster("cluster1", "eds1").CallDropped("test") 497 498 // Ensure the initial load reporting request is received at the server. 499 req, err := lrsServer.LRSRequestChan.Receive(ctx) 500 if err != nil { 501 t.Fatalf("Timeout when waiting for initial LRS request: %v", err) 502 } 503 gotInitialReq := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest) 504 nodeProto := &v3corepb.Node{ 505 Id: nodeID, 506 UserAgentName: "user-agent", 507 UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: "0.0.0.0"}, 508 ClientFeatures: []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw", "envoy.lrs.supports_send_all_clusters"}, 509 } 510 wantInitialReq := &v3lrspb.LoadStatsRequest{Node: nodeProto} 511 if diff := cmp.Diff(gotInitialReq, wantInitialReq, protocmp.Transform()); diff != "" { 512 t.Fatalf("Unexpected diff in initial LRS request (-got, +want):\n%s", diff) 513 } 514 515 // Send a response from the server with a small deadline. 516 lrsServer.LRSResponseChan <- &fakeserver.Response{ 517 Resp: &v3lrspb.LoadStatsResponse{ 518 SendAllClusters: true, 519 LoadReportingInterval: &durationpb.Duration{Nanos: 50000000}, // 50ms 520 }, 521 } 522 523 // Ensure that loads are seen on the server. 524 req, err = lrsServer.LRSRequestChan.Receive(ctx) 525 if err != nil { 526 t.Fatal("Timeout when waiting for LRS request with loads") 527 } 528 gotLoad := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest).ClusterStats 529 if l := len(gotLoad); l != 1 { 530 t.Fatalf("Received load for %d clusters, want 1", l) 531 } 532 533 // This field is set by the client to indicate the actual time elapsed since 534 // the last report was sent. We cannot deterministically compare this, and 535 // we cannot use the cmpopts.IgnoreFields() option on proto structs, since 536 // we already use the protocmp.Transform() which marshals the struct into 537 // another message. Hence setting this field to nil is the best option here. 538 gotLoad[0].LoadReportInterval = nil 539 wantLoad := &v3endpointpb.ClusterStats{ 540 ClusterName: "cluster1", 541 ClusterServiceName: "eds1", 542 TotalDroppedRequests: 1, 543 DroppedRequests: []*v3endpointpb.ClusterStats_DroppedRequests{{Category: "test", DroppedCount: 1}}, 544 } 545 if diff := cmp.Diff(wantLoad, gotLoad[0], protocmp.Transform(), toleranceCmpOpt, ignoreOrderCmpOpt); diff != "" { 546 t.Fatalf("Unexpected diff in LRS request (-got, +want):\n%s", diff) 547 } 548 549 // Create a context for Stop() that remains until the end of test to ensure 550 // that only possibility of Stop()s to finish is if final load send attempt 551 // is made. If final load attempt is not made, test will timeout. 552 stopCtx, stopCancel := context.WithCancel(ctx) 553 defer stopCancel() 554 555 // Push more loads. 556 loadStore.ReporterForCluster("cluster2", "eds2").CallDropped("test") 557 558 stopCalled := make(chan struct{}) 559 // Call Stop in a separate goroutine. It will block until 560 // final load send attempt is made. 561 go func() { 562 loadStore.Stop(stopCtx) 563 close(stopCalled) 564 }() 565 <-stopCalled 566 567 // Ensure that loads are seen on the server. We need a loop here because 568 // there could have been some requests from the client in the time between 569 // us reading the first request and now. Those would have been queued in the 570 // request channel that we read out of. 571 for { 572 if ctx.Err() != nil { 573 t.Fatalf("Timeout when waiting for new loads to be seen on the server") 574 } 575 576 req, err = lrsServer.LRSRequestChan.Receive(ctx) 577 if err != nil || req.(*fakeserver.Request).Err != nil { 578 continue 579 } 580 if req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest) == nil { 581 // This can happen due to a race: 582 // 1. Load for "cluster2" is reported just before Stop(). 583 // 2. The periodic ticker might send this load before Stop()'s 584 // final send mechanism processes it, clearing the data. 585 // 3. Stop()'s final send might then send an empty report. 586 // This is acceptable for this test because we only need to verify 587 // if the final load report send attempt was made. 588 t.Logf("Empty final load report sent on server") 589 break 590 } 591 gotLoad = req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest).ClusterStats 592 if l := len(gotLoad); l != 1 { 593 continue 594 } 595 gotLoad[0].LoadReportInterval = nil 596 wantLoad := &v3endpointpb.ClusterStats{ 597 ClusterName: "cluster2", 598 ClusterServiceName: "eds2", 599 TotalDroppedRequests: 1, 600 DroppedRequests: []*v3endpointpb.ClusterStats_DroppedRequests{{Category: "test", DroppedCount: 1}}, 601 } 602 if diff := cmp.Diff(wantLoad, gotLoad[0], protocmp.Transform()); diff != "" { 603 t.Logf("Unexpected diff in LRS request (-got, +want):\n%s", diff) 604 continue 605 } 606 break 607 } 608 609 // Verify the stream is eventually closed on the server side. 610 if _, err := lrsServer.LRSStreamCloseChan.Receive(ctx); err != nil { 611 t.Fatal("Timeout waiting for LRS stream to close") 612 } 613 } 614 615 // TestReportLoad_LoadReportInterval tests verify that the load report interval 616 // received by the LRS server is the duration between start of last load 617 // reporting by the client and the time when the load is reported to the LRS 618 // server. 619 func (s) TestReportLoad_LoadReportInterval(t *testing.T) { 620 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 621 defer cancel() 622 623 originalTimeNow := lrsclientinternal.TimeNow 624 t.Cleanup(func() { lrsclientinternal.TimeNow = originalTimeNow }) 625 626 // Create a management server that serves LRS. 627 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{SupportLoadReportingService: true}) 628 629 // Create an LRS client with configuration pointing to the above server. 630 nodeID := uuid.New().String() 631 632 configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}} 633 config := lrsclient.Config{ 634 Node: clients.Node{ID: nodeID, UserAgentName: "user-agent", UserAgentVersion: "0.0.0.0"}, 635 TransportBuilder: grpctransport.NewBuilder(configs), 636 } 637 client, err := lrsclient.New(config) 638 if err != nil { 639 t.Fatalf("lrsclient.New() failed: %v", err) 640 } 641 642 // Call the load reporting API, and ensure that an LRS stream is created. 643 serverIdentifier := clients.ServerIdentifier{ServerURI: mgmtServer.Address, Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}} 644 loadStore1, err := client.ReportLoad(serverIdentifier) 645 if err != nil { 646 t.Fatalf("client.ReportLoad() failed: %v", err) 647 } 648 lrsServer := mgmtServer.LRSServer 649 if _, err := lrsServer.LRSStreamOpenChan.Receive(ctx); err != nil { 650 t.Fatalf("Timeout when waiting for LRS stream to be created: %v", err) 651 } 652 653 // Initial time for reporter creation 654 currentTime := time.Now() 655 lrsclientinternal.TimeNow = func() time.Time { 656 return currentTime 657 } 658 659 // Report dummy drop to ensure stats is not nil. 660 loadStore1.ReporterForCluster("cluster1", "eds1").CallDropped("test") 661 662 // Update currentTime to simulate the passage of time between the reporter 663 // creation and first stats() call. 664 currentTime = currentTime.Add(5 * time.Second) 665 666 // Ensure the initial load reporting request is received at the server. 667 req, err := lrsServer.LRSRequestChan.Receive(ctx) 668 if err != nil { 669 t.Fatalf("Timeout when waiting for initial LRS request: %v", err) 670 } 671 gotInitialReq := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest) 672 nodeProto := &v3corepb.Node{ 673 Id: nodeID, 674 UserAgentName: "user-agent", 675 UserAgentVersionType: &v3corepb.Node_UserAgentVersion{UserAgentVersion: "0.0.0.0"}, 676 ClientFeatures: []string{"envoy.lb.does_not_support_overprovisioning", "xds.config.resource-in-sotw", "envoy.lrs.supports_send_all_clusters"}, 677 } 678 wantInitialReq := &v3lrspb.LoadStatsRequest{Node: nodeProto} 679 if diff := cmp.Diff(gotInitialReq, wantInitialReq, protocmp.Transform()); diff != "" { 680 t.Fatalf("Unexpected diff in initial LRS request (-got, +want):\n%s", diff) 681 } 682 683 // Send a response from the server with a small deadline. 684 lrsServer.LRSResponseChan <- &fakeserver.Response{ 685 Resp: &v3lrspb.LoadStatsResponse{ 686 SendAllClusters: true, 687 LoadReportingInterval: &durationpb.Duration{Nanos: 50000000}, // 50ms 688 }, 689 } 690 691 // Ensure that loads are seen on the server. 692 req, err = lrsServer.LRSRequestChan.Receive(ctx) 693 if err != nil { 694 t.Fatal("Timeout when waiting for LRS request with loads") 695 } 696 gotLoad := req.(*fakeserver.Request).Req.(*v3lrspb.LoadStatsRequest).ClusterStats 697 if l := len(gotLoad); l != 1 { 698 t.Fatalf("Received load for %d clusters, want 1", l) 699 } 700 // Verify load received at LRS server has load report interval calculated 701 // from the time of reporter creation. 702 if got, want := gotLoad[0].GetLoadReportInterval().AsDuration(), 5*time.Second; got != want { 703 t.Errorf("Got load report interval %v, want %v", got, want) 704 } 705 706 ssCtx, ssCancel := context.WithTimeout(context.Background(), time.Millisecond) 707 defer ssCancel() 708 loadStore1.Stop(ssCtx) 709 }