google.golang.org/grpc@v1.72.2/orca/producer_test.go (about) 1 /* 2 * Copyright 2022 gRPC authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package orca_test 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "testing" 24 "time" 25 26 "google.golang.org/grpc" 27 "google.golang.org/grpc/balancer" 28 "google.golang.org/grpc/balancer/roundrobin" 29 "google.golang.org/grpc/codes" 30 "google.golang.org/grpc/connectivity" 31 "google.golang.org/grpc/credentials/insecure" 32 "google.golang.org/grpc/internal/grpctest" 33 "google.golang.org/grpc/internal/testutils" 34 "google.golang.org/grpc/orca" 35 "google.golang.org/grpc/orca/internal" 36 "google.golang.org/grpc/resolver" 37 "google.golang.org/grpc/resolver/manual" 38 "google.golang.org/grpc/status" 39 "google.golang.org/protobuf/proto" 40 41 v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3" 42 v3orcaservicegrpc "github.com/cncf/xds/go/xds/service/orca/v3" 43 v3orcaservicepb "github.com/cncf/xds/go/xds/service/orca/v3" 44 ) 45 46 // customLBB wraps a round robin LB policy but provides a ClientConn wrapper to 47 // add an ORCA OOB report producer for all created SubConns. 48 type customLBB struct{} 49 50 func (customLBB) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { 51 return balancer.Get(roundrobin.Name).Build(&ccWrapper{ClientConn: cc}, opts) 52 } 53 54 func (customLBB) Name() string { return "customLB" } 55 56 func init() { 57 balancer.Register(customLBB{}) 58 } 59 60 type ccWrapper struct { 61 balancer.ClientConn 62 } 63 64 func (w *ccWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { 65 if len(addrs) != 1 { 66 panic(fmt.Sprintf("got addrs=%v; want len(addrs) == 1", addrs)) 67 } 68 var sc balancer.SubConn 69 opts.StateListener = func(scs balancer.SubConnState) { 70 if scs.ConnectivityState != connectivity.Ready { 71 return 72 } 73 l := getListenerInfo(addrs[0]) 74 l.listener.cleanup = orca.RegisterOOBListener(sc, l.listener, l.opts) 75 l.scChan <- sc 76 } 77 sc, err := w.ClientConn.NewSubConn(addrs, opts) 78 if err != nil { 79 return sc, err 80 } 81 return sc, nil 82 } 83 84 // listenerInfo is stored in an address's attributes to allow ORCA 85 // listeners to be registered on subconns created for that address. 86 type listenerInfo struct { 87 listener *testOOBListener 88 opts orca.OOBListenerOptions 89 scChan chan balancer.SubConn // Pushed on by the LB policy 90 } 91 92 type listenerInfoKey struct{} 93 94 func setListenerInfo(addr resolver.Address, l *listenerInfo) resolver.Address { 95 addr.Attributes = addr.Attributes.WithValue(listenerInfoKey{}, l) 96 return addr 97 } 98 99 func getListenerInfo(addr resolver.Address) *listenerInfo { 100 return addr.Attributes.Value(listenerInfoKey{}).(*listenerInfo) 101 } 102 103 // testOOBListener is a simple listener that pushes load reports to a channel. 104 type testOOBListener struct { 105 cleanup func() 106 loadReportCh chan *v3orcapb.OrcaLoadReport 107 } 108 109 func newTestOOBListener() *testOOBListener { 110 return &testOOBListener{cleanup: func() {}, loadReportCh: make(chan *v3orcapb.OrcaLoadReport)} 111 } 112 113 func (t *testOOBListener) Stop() { t.cleanup() } 114 115 func (t *testOOBListener) OnLoadReport(r *v3orcapb.OrcaLoadReport) { 116 t.loadReportCh <- r 117 } 118 119 // TestProducer is a basic, end-to-end style test of an LB policy with an 120 // OOBListener communicating with a server with an ORCA service. 121 func (s) TestProducer(t *testing.T) { 122 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 123 defer cancel() 124 125 // Use a fixed backoff for stream recreation. 126 oldBackoff := internal.DefaultBackoffFunc 127 internal.DefaultBackoffFunc = func(int) time.Duration { return 10 * time.Millisecond } 128 defer func() { internal.DefaultBackoffFunc = oldBackoff }() 129 130 // Initialize listener for our ORCA server. 131 lis, err := testutils.LocalTCPListener() 132 if err != nil { 133 t.Fatal(err) 134 } 135 136 // Register the OpenRCAService with a very short metrics reporting interval. 137 const shortReportingInterval = 50 * time.Millisecond 138 smr := orca.NewServerMetricsRecorder() 139 opts := orca.ServiceOptions{MinReportingInterval: shortReportingInterval, ServerMetricsProvider: smr} 140 internal.AllowAnyMinReportingInterval.(func(*orca.ServiceOptions))(&opts) 141 s := grpc.NewServer() 142 if err := orca.Register(s, opts); err != nil { 143 t.Fatalf("orca.Register failed: %v", err) 144 } 145 go s.Serve(lis) 146 defer s.Stop() 147 148 // Create our client with an OOB listener in the LB policy it selects. 149 r := manual.NewBuilderWithScheme("whatever") 150 oobLis := newTestOOBListener() 151 152 lisOpts := orca.OOBListenerOptions{ReportInterval: 50 * time.Millisecond} 153 li := &listenerInfo{scChan: make(chan balancer.SubConn, 1), listener: oobLis, opts: lisOpts} 154 addr := setListenerInfo(resolver.Address{Addr: lis.Addr().String()}, li) 155 r.InitialState(resolver.State{Addresses: []resolver.Address{addr}}) 156 dopts := []grpc.DialOption{ 157 grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"customLB":{}}]}`), 158 grpc.WithResolvers(r), 159 grpc.WithTransportCredentials(insecure.NewCredentials()), 160 } 161 cc, err := grpc.NewClient("whatever:///whatever", dopts...) 162 if err != nil { 163 t.Fatalf("grpc.NewClient() failed: %v", err) 164 } 165 defer cc.Close() 166 cc.Connect() 167 // Set a few metrics and wait for them on the client side. 168 smr.SetCPUUtilization(10) 169 smr.SetMemoryUtilization(0.1) 170 smr.SetNamedUtilization("bob", 0.555) 171 loadReportWant := &v3orcapb.OrcaLoadReport{ 172 CpuUtilization: 10, 173 MemUtilization: 0.1, 174 Utilization: map[string]float64{"bob": 0.555}, 175 } 176 177 testReport: 178 for { 179 select { 180 case r := <-oobLis.loadReportCh: 181 t.Log("Load report received: ", r) 182 if proto.Equal(r, loadReportWant) { 183 // Success! 184 break testReport 185 } 186 case <-ctx.Done(): 187 t.Fatalf("timed out waiting for load report: %v", loadReportWant) 188 } 189 } 190 191 // Change and add metrics and wait for them on the client side. 192 smr.SetCPUUtilization(0.5) 193 smr.SetMemoryUtilization(0.2) 194 smr.SetNamedUtilization("mary", 0.321) 195 loadReportWant = &v3orcapb.OrcaLoadReport{ 196 CpuUtilization: 0.5, 197 MemUtilization: 0.2, 198 Utilization: map[string]float64{"bob": 0.555, "mary": 0.321}, 199 } 200 201 for { 202 select { 203 case r := <-oobLis.loadReportCh: 204 t.Log("Load report received: ", r) 205 if proto.Equal(r, loadReportWant) { 206 // Success! 207 return 208 } 209 case <-ctx.Done(): 210 t.Fatalf("timed out waiting for load report: %v", loadReportWant) 211 } 212 } 213 214 } 215 216 // fakeORCAService is a simple implementation of an ORCA service that pushes 217 // requests it receives from clients to a channel and sends responses from a 218 // channel back. This allows tests to verify the client is sending requests 219 // and processing responses properly. 220 type fakeORCAService struct { 221 v3orcaservicegrpc.UnimplementedOpenRcaServiceServer 222 223 reqCh chan *v3orcaservicepb.OrcaLoadReportRequest 224 respCh chan any // either *v3orcapb.OrcaLoadReport or error 225 } 226 227 func newFakeORCAService() *fakeORCAService { 228 return &fakeORCAService{ 229 reqCh: make(chan *v3orcaservicepb.OrcaLoadReportRequest), 230 respCh: make(chan any), 231 } 232 } 233 234 func (f *fakeORCAService) close() { 235 close(f.respCh) 236 } 237 238 func (f *fakeORCAService) StreamCoreMetrics(req *v3orcaservicepb.OrcaLoadReportRequest, stream v3orcaservicegrpc.OpenRcaService_StreamCoreMetricsServer) error { 239 f.reqCh <- req 240 for { 241 var resp any 242 select { 243 case resp = <-f.respCh: 244 case <-stream.Context().Done(): 245 return stream.Context().Err() 246 } 247 248 if err, ok := resp.(error); ok { 249 return err 250 } 251 if err := stream.Send(resp.(*v3orcapb.OrcaLoadReport)); err != nil { 252 // In the event that a stream error occurs, a new stream will have 253 // been created that was waiting for this response message. Push 254 // it back onto the channel and return. 255 // 256 // This happens because we range over respCh. If we changed to 257 // instead select on respCh + stream.Context(), the same situation 258 // could still occur due to a race between noticing the two events, 259 // so such a workaround would still be needed to prevent flakiness. 260 f.respCh <- resp 261 return err 262 } 263 } 264 } 265 266 // TestProducerBackoff verifies that the ORCA producer applies the proper 267 // backoff after stream failures. 268 func (s) TestProducerBackoff(t *testing.T) { 269 grpctest.TLogger.ExpectErrorN("injected error", 4) 270 271 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 272 defer cancel() 273 274 // Provide a convenient way to expect backoff calls and return a minimal 275 // value. 276 const backoffShouldNotBeCalled = 9999 // Use to assert backoff function is not called. 277 const backoffAllowAny = -1 // Use to ignore any backoff calls. 278 expectedBackoff := backoffAllowAny 279 oldBackoff := internal.DefaultBackoffFunc 280 internal.DefaultBackoffFunc = func(got int) time.Duration { 281 if expectedBackoff == backoffShouldNotBeCalled { 282 t.Errorf("Unexpected backoff call; parameter = %v", got) 283 } else if expectedBackoff != backoffAllowAny { 284 if got != expectedBackoff { 285 t.Errorf("Unexpected backoff received; got %v want %v", got, expectedBackoff) 286 } 287 } 288 return time.Millisecond 289 } 290 defer func() { internal.DefaultBackoffFunc = oldBackoff }() 291 292 // Initialize listener for our ORCA server. 293 lis, err := testutils.LocalTCPListener() 294 if err != nil { 295 t.Fatal(err) 296 } 297 298 // Register our fake ORCA service. 299 s := grpc.NewServer() 300 fake := newFakeORCAService() 301 defer fake.close() 302 v3orcaservicegrpc.RegisterOpenRcaServiceServer(s, fake) 303 go s.Serve(lis) 304 defer s.Stop() 305 306 // Define the report interval and a function to wait for it to be sent to 307 // the server. 308 const reportInterval = 123 * time.Second 309 awaitRequest := func(interval time.Duration) { 310 select { 311 case req := <-fake.reqCh: 312 if got := req.GetReportInterval().AsDuration(); got != interval { 313 t.Errorf("Unexpected report interval; got %v want %v", got, interval) 314 } 315 case <-ctx.Done(): 316 t.Fatalf("Did not receive client request") 317 } 318 } 319 320 // Create our client with an OOB listener in the LB policy it selects. 321 r := manual.NewBuilderWithScheme("whatever") 322 oobLis := newTestOOBListener() 323 324 lisOpts := orca.OOBListenerOptions{ReportInterval: reportInterval} 325 li := &listenerInfo{scChan: make(chan balancer.SubConn, 1), listener: oobLis, opts: lisOpts} 326 r.InitialState(resolver.State{Addresses: []resolver.Address{setListenerInfo(resolver.Address{Addr: lis.Addr().String()}, li)}}) 327 dopts := []grpc.DialOption{ 328 grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"customLB":{}}]}`), 329 grpc.WithResolvers(r), 330 grpc.WithTransportCredentials(insecure.NewCredentials()), 331 } 332 cc, err := grpc.NewClient("whatever:///whatever", dopts...) 333 if err != nil { 334 t.Fatalf("grpc.NewClient failed: %v", err) 335 } 336 cc.Connect() 337 defer cc.Close() 338 339 // Define a load report to send and expect the client to see. 340 loadReportWant := &v3orcapb.OrcaLoadReport{ 341 CpuUtilization: 10, 342 MemUtilization: 0.1, 343 Utilization: map[string]float64{"bob": 0.555}, 344 } 345 346 // Unblock the fake. 347 awaitRequest(reportInterval) 348 fake.respCh <- loadReportWant 349 select { 350 case r := <-oobLis.loadReportCh: 351 t.Log("Load report received: ", r) 352 if proto.Equal(r, loadReportWant) { 353 // Success! 354 break 355 } 356 case <-ctx.Done(): 357 t.Fatalf("timed out waiting for load report: %v", loadReportWant) 358 } 359 360 // The next request should be immediate, since there was a message 361 // received. 362 expectedBackoff = backoffShouldNotBeCalled 363 fake.respCh <- status.Errorf(codes.Internal, "injected error") 364 awaitRequest(reportInterval) 365 366 // The next requests will need to backoff. 367 expectedBackoff = 0 368 fake.respCh <- status.Errorf(codes.Internal, "injected error") 369 awaitRequest(reportInterval) 370 expectedBackoff = 1 371 fake.respCh <- status.Errorf(codes.Internal, "injected error") 372 awaitRequest(reportInterval) 373 expectedBackoff = 2 374 fake.respCh <- status.Errorf(codes.Internal, "injected error") 375 awaitRequest(reportInterval) 376 // The next request should be immediate, since there was a message 377 // received. 378 expectedBackoff = backoffShouldNotBeCalled 379 380 // Send another valid response and wait for it on the client. 381 fake.respCh <- loadReportWant 382 select { 383 case r := <-oobLis.loadReportCh: 384 t.Log("Load report received: ", r) 385 if proto.Equal(r, loadReportWant) { 386 // Success! 387 break 388 } 389 case <-ctx.Done(): 390 t.Fatalf("timed out waiting for load report: %v", loadReportWant) 391 } 392 } 393 394 // TestProducerMultipleListeners tests that multiple listeners works as 395 // expected in a producer: requesting the proper interval and delivering the 396 // update to all listeners. 397 func (s) TestProducerMultipleListeners(t *testing.T) { 398 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 399 defer cancel() 400 401 // Provide a convenient way to expect backoff calls and return a minimal 402 // value. 403 oldBackoff := internal.DefaultBackoffFunc 404 internal.DefaultBackoffFunc = func(got int) time.Duration { 405 return time.Millisecond 406 } 407 defer func() { internal.DefaultBackoffFunc = oldBackoff }() 408 409 // Initialize listener for our ORCA server. 410 lis, err := testutils.LocalTCPListener() 411 if err != nil { 412 t.Fatal(err) 413 } 414 415 // Register our fake ORCA service. 416 s := grpc.NewServer() 417 fake := newFakeORCAService() 418 defer fake.close() 419 v3orcaservicegrpc.RegisterOpenRcaServiceServer(s, fake) 420 go s.Serve(lis) 421 defer s.Stop() 422 423 // Define the report interval and a function to wait for it to be sent to 424 // the server. 425 const reportInterval1 = 123 * time.Second 426 const reportInterval2 = 234 * time.Second 427 const reportInterval3 = 56 * time.Second 428 awaitRequest := func(interval time.Duration) { 429 select { 430 case req := <-fake.reqCh: 431 if got := req.GetReportInterval().AsDuration(); got != interval { 432 t.Errorf("Unexpected report interval; got %v want %v", got, interval) 433 } 434 case <-ctx.Done(): 435 t.Fatalf("Did not receive client request") 436 } 437 } 438 439 // Create our client with an OOB listener in the LB policy it selects. 440 r := manual.NewBuilderWithScheme("whatever") 441 oobLis1 := newTestOOBListener() 442 lisOpts1 := orca.OOBListenerOptions{ReportInterval: reportInterval1} 443 li := &listenerInfo{scChan: make(chan balancer.SubConn, 1), listener: oobLis1, opts: lisOpts1} 444 r.InitialState(resolver.State{Addresses: []resolver.Address{setListenerInfo(resolver.Address{Addr: lis.Addr().String()}, li)}}) 445 cc, err := grpc.NewClient("whatever:///whatever", grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"customLB":{}}]}`), grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials())) 446 if err != nil { 447 t.Fatalf("grpc.NewClient() failed: %v", err) 448 } 449 cc.Connect() 450 defer cc.Close() 451 452 // Ensure the OOB listener is stopped before the client is closed to avoid 453 // a potential irrelevant error in the logs. 454 defer oobLis1.Stop() 455 456 oobLis2 := newTestOOBListener() 457 lisOpts2 := orca.OOBListenerOptions{ReportInterval: reportInterval2} 458 459 oobLis3 := newTestOOBListener() 460 lisOpts3 := orca.OOBListenerOptions{ReportInterval: reportInterval3} 461 462 // Define a load report to send and expect the client to see. 463 loadReportWant := &v3orcapb.OrcaLoadReport{ 464 CpuUtilization: 10, 465 MemUtilization: 0.1, 466 Utilization: map[string]float64{"bob": 0.555}, 467 } 468 469 // Receive reports and update counts for the three listeners. 470 var reportsMu sync.Mutex 471 var reportsReceived1, reportsReceived2, reportsReceived3 int 472 go func() { 473 for { 474 select { 475 case r := <-oobLis1.loadReportCh: 476 t.Log("Load report 1 received: ", r) 477 if !proto.Equal(r, loadReportWant) { 478 t.Errorf("Unexpected report received: %+v", r) 479 } 480 reportsMu.Lock() 481 reportsReceived1++ 482 reportsMu.Unlock() 483 case r := <-oobLis2.loadReportCh: 484 t.Log("Load report 2 received: ", r) 485 if !proto.Equal(r, loadReportWant) { 486 t.Errorf("Unexpected report received: %+v", r) 487 } 488 reportsMu.Lock() 489 reportsReceived2++ 490 reportsMu.Unlock() 491 case r := <-oobLis3.loadReportCh: 492 t.Log("Load report 3 received: ", r) 493 if !proto.Equal(r, loadReportWant) { 494 t.Errorf("Unexpected report received: %+v", r) 495 } 496 reportsMu.Lock() 497 reportsReceived3++ 498 reportsMu.Unlock() 499 case <-ctx.Done(): 500 // Test has ended; exit 501 return 502 } 503 } 504 }() 505 506 // checkReports is a helper function to check the report counts for the three listeners. 507 checkReports := func(r1, r2, r3 int) { 508 t.Helper() 509 for ctx.Err() == nil { 510 reportsMu.Lock() 511 if r1 == reportsReceived1 && r2 == reportsReceived2 && r3 == reportsReceived3 { 512 // Success! 513 reportsMu.Unlock() 514 return 515 } 516 if reportsReceived1 > r1 || reportsReceived2 > r2 || reportsReceived3 > r3 { 517 reportsMu.Unlock() 518 t.Fatalf("received excess reports. got %v %v %v; want %v %v %v", reportsReceived1, reportsReceived2, reportsReceived3, r1, r2, r3) 519 return 520 } 521 reportsMu.Unlock() 522 time.Sleep(10 * time.Millisecond) 523 } 524 t.Fatalf("timed out waiting for reports received. got %v %v %v; want %v %v %v", reportsReceived1, reportsReceived2, reportsReceived3, r1, r2, r3) 525 } 526 527 // Only 1 listener; expect reportInterval1 to be used and expect the report 528 // to be sent to the listener. 529 awaitRequest(reportInterval1) 530 fake.respCh <- loadReportWant 531 checkReports(1, 0, 0) 532 533 sc := <-li.scChan 534 // Register listener 2 with a less frequent interval; no need to recreate 535 // stream. Report should go to both listeners. 536 oobLis2.cleanup = orca.RegisterOOBListener(sc, oobLis2, lisOpts2) 537 fake.respCh <- loadReportWant 538 checkReports(2, 1, 0) 539 540 // Register listener 3 with a more frequent interval; stream is recreated 541 // with this interval. The next report will go to all three listeners. 542 oobLis3.cleanup = orca.RegisterOOBListener(sc, oobLis3, lisOpts3) 543 awaitRequest(reportInterval3) 544 fake.respCh <- loadReportWant 545 checkReports(3, 2, 1) 546 547 // Another report without a change in listeners should go to all three listeners. 548 fake.respCh <- loadReportWant 549 checkReports(4, 3, 2) 550 551 // Stop listener 2. This does not affect the interval as listener 3 is 552 // still the shortest. The next update goes to listeners 1 and 3. 553 oobLis2.Stop() 554 fake.respCh <- loadReportWant 555 checkReports(5, 3, 3) 556 557 // Stop listener 3. This makes the interval longer. Reports should only 558 // go to listener 1 now. 559 oobLis3.Stop() 560 awaitRequest(reportInterval1) 561 fake.respCh <- loadReportWant 562 checkReports(6, 3, 3) 563 // Another report without a change in listeners should go to the first listener. 564 fake.respCh <- loadReportWant 565 checkReports(7, 3, 3) 566 }