google.golang.org/grpc@v1.62.1/orca/producer_test.go (about) 1 /* 2 * Copyright 2022 gRPC authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package orca_test 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "testing" 24 "time" 25 26 "google.golang.org/grpc" 27 "google.golang.org/grpc/balancer" 28 "google.golang.org/grpc/balancer/roundrobin" 29 "google.golang.org/grpc/codes" 30 "google.golang.org/grpc/credentials/insecure" 31 "google.golang.org/grpc/internal/grpctest" 32 "google.golang.org/grpc/internal/testutils" 33 "google.golang.org/grpc/orca" 34 "google.golang.org/grpc/orca/internal" 35 "google.golang.org/grpc/resolver" 36 "google.golang.org/grpc/resolver/manual" 37 "google.golang.org/grpc/status" 38 "google.golang.org/protobuf/proto" 39 40 v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3" 41 v3orcaservicegrpc "github.com/cncf/xds/go/xds/service/orca/v3" 42 v3orcaservicepb "github.com/cncf/xds/go/xds/service/orca/v3" 43 ) 44 45 // customLBB wraps a round robin LB policy but provides a ClientConn wrapper to 46 // add an ORCA OOB report producer for all created SubConns. 47 type customLBB struct{} 48 49 func (customLBB) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { 50 return balancer.Get(roundrobin.Name).Build(&ccWrapper{ClientConn: cc}, opts) 51 } 52 53 func (customLBB) Name() string { return "customLB" } 54 55 func init() { 56 balancer.Register(customLBB{}) 57 } 58 59 type ccWrapper struct { 60 balancer.ClientConn 61 } 62 63 func (w *ccWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { 64 if len(addrs) != 1 { 65 panic(fmt.Sprintf("got addrs=%v; want len(addrs) == 1", addrs)) 66 } 67 sc, err := w.ClientConn.NewSubConn(addrs, opts) 68 if err != nil { 69 return sc, err 70 } 71 l := getListenerInfo(addrs[0]) 72 l.listener.cleanup = orca.RegisterOOBListener(sc, l.listener, l.opts) 73 l.sc = sc 74 return sc, nil 75 } 76 77 // listenerInfo is stored in an address's attributes to allow ORCA 78 // listeners to be registered on subconns created for that address. 79 type listenerInfo struct { 80 listener *testOOBListener 81 opts orca.OOBListenerOptions 82 sc balancer.SubConn // Set by the LB policy 83 } 84 85 type listenerInfoKey struct{} 86 87 func setListenerInfo(addr resolver.Address, l *listenerInfo) resolver.Address { 88 addr.Attributes = addr.Attributes.WithValue(listenerInfoKey{}, l) 89 return addr 90 } 91 92 func getListenerInfo(addr resolver.Address) *listenerInfo { 93 return addr.Attributes.Value(listenerInfoKey{}).(*listenerInfo) 94 } 95 96 // testOOBListener is a simple listener that pushes load reports to a channel. 97 type testOOBListener struct { 98 cleanup func() 99 loadReportCh chan *v3orcapb.OrcaLoadReport 100 } 101 102 func newTestOOBListener() *testOOBListener { 103 return &testOOBListener{cleanup: func() {}, loadReportCh: make(chan *v3orcapb.OrcaLoadReport)} 104 } 105 106 func (t *testOOBListener) Stop() { t.cleanup() } 107 108 func (t *testOOBListener) OnLoadReport(r *v3orcapb.OrcaLoadReport) { 109 t.loadReportCh <- r 110 } 111 112 // TestProducer is a basic, end-to-end style test of an LB policy with an 113 // OOBListener communicating with a server with an ORCA service. 114 func (s) TestProducer(t *testing.T) { 115 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 116 defer cancel() 117 118 // Use a fixed backoff for stream recreation. 119 oldBackoff := internal.DefaultBackoffFunc 120 internal.DefaultBackoffFunc = func(int) time.Duration { return 10 * time.Millisecond } 121 defer func() { internal.DefaultBackoffFunc = oldBackoff }() 122 123 // Initialize listener for our ORCA server. 124 lis, err := testutils.LocalTCPListener() 125 if err != nil { 126 t.Fatal(err) 127 } 128 129 // Register the OpenRCAService with a very short metrics reporting interval. 130 const shortReportingInterval = 50 * time.Millisecond 131 smr := orca.NewServerMetricsRecorder() 132 opts := orca.ServiceOptions{MinReportingInterval: shortReportingInterval, ServerMetricsProvider: smr} 133 internal.AllowAnyMinReportingInterval.(func(*orca.ServiceOptions))(&opts) 134 s := grpc.NewServer() 135 if err := orca.Register(s, opts); err != nil { 136 t.Fatalf("orca.Register failed: %v", err) 137 } 138 go s.Serve(lis) 139 defer s.Stop() 140 141 // Create our client with an OOB listener in the LB policy it selects. 142 r := manual.NewBuilderWithScheme("whatever") 143 oobLis := newTestOOBListener() 144 145 lisOpts := orca.OOBListenerOptions{ReportInterval: 50 * time.Millisecond} 146 li := &listenerInfo{listener: oobLis, opts: lisOpts} 147 addr := setListenerInfo(resolver.Address{Addr: lis.Addr().String()}, li) 148 r.InitialState(resolver.State{Addresses: []resolver.Address{addr}}) 149 cc, err := grpc.Dial("whatever:///whatever", grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"customLB":{}}]}`), grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials())) 150 if err != nil { 151 t.Fatalf("grpc.Dial failed: %v", err) 152 } 153 defer cc.Close() 154 155 // Ensure the OOB listener is stopped before the client is closed to avoid 156 // a potential irrelevant error in the logs. 157 defer oobLis.Stop() 158 159 // Set a few metrics and wait for them on the client side. 160 smr.SetCPUUtilization(10) 161 smr.SetMemoryUtilization(0.1) 162 smr.SetNamedUtilization("bob", 0.555) 163 loadReportWant := &v3orcapb.OrcaLoadReport{ 164 CpuUtilization: 10, 165 MemUtilization: 0.1, 166 Utilization: map[string]float64{"bob": 0.555}, 167 } 168 169 testReport: 170 for { 171 select { 172 case r := <-oobLis.loadReportCh: 173 t.Log("Load report received: ", r) 174 if proto.Equal(r, loadReportWant) { 175 // Success! 176 break testReport 177 } 178 case <-ctx.Done(): 179 t.Fatalf("timed out waiting for load report: %v", loadReportWant) 180 } 181 } 182 183 // Change and add metrics and wait for them on the client side. 184 smr.SetCPUUtilization(0.5) 185 smr.SetMemoryUtilization(0.2) 186 smr.SetNamedUtilization("mary", 0.321) 187 loadReportWant = &v3orcapb.OrcaLoadReport{ 188 CpuUtilization: 0.5, 189 MemUtilization: 0.2, 190 Utilization: map[string]float64{"bob": 0.555, "mary": 0.321}, 191 } 192 193 for { 194 select { 195 case r := <-oobLis.loadReportCh: 196 t.Log("Load report received: ", r) 197 if proto.Equal(r, loadReportWant) { 198 // Success! 199 return 200 } 201 case <-ctx.Done(): 202 t.Fatalf("timed out waiting for load report: %v", loadReportWant) 203 } 204 } 205 } 206 207 // fakeORCAService is a simple implementation of an ORCA service that pushes 208 // requests it receives from clients to a channel and sends responses from a 209 // channel back. This allows tests to verify the client is sending requests 210 // and processing responses properly. 211 type fakeORCAService struct { 212 v3orcaservicegrpc.UnimplementedOpenRcaServiceServer 213 214 reqCh chan *v3orcaservicepb.OrcaLoadReportRequest 215 respCh chan any // either *v3orcapb.OrcaLoadReport or error 216 } 217 218 func newFakeORCAService() *fakeORCAService { 219 return &fakeORCAService{ 220 reqCh: make(chan *v3orcaservicepb.OrcaLoadReportRequest), 221 respCh: make(chan any), 222 } 223 } 224 225 func (f *fakeORCAService) close() { 226 close(f.respCh) 227 } 228 229 func (f *fakeORCAService) StreamCoreMetrics(req *v3orcaservicepb.OrcaLoadReportRequest, stream v3orcaservicegrpc.OpenRcaService_StreamCoreMetricsServer) error { 230 f.reqCh <- req 231 for { 232 var resp any 233 select { 234 case resp = <-f.respCh: 235 case <-stream.Context().Done(): 236 return stream.Context().Err() 237 } 238 239 if err, ok := resp.(error); ok { 240 return err 241 } 242 if err := stream.Send(resp.(*v3orcapb.OrcaLoadReport)); err != nil { 243 // In the event that a stream error occurs, a new stream will have 244 // been created that was waiting for this response message. Push 245 // it back onto the channel and return. 246 // 247 // This happens because we range over respCh. If we changed to 248 // instead select on respCh + stream.Context(), the same situation 249 // could still occur due to a race between noticing the two events, 250 // so such a workaround would still be needed to prevent flakiness. 251 f.respCh <- resp 252 return err 253 } 254 } 255 } 256 257 // TestProducerBackoff verifies that the ORCA producer applies the proper 258 // backoff after stream failures. 259 func (s) TestProducerBackoff(t *testing.T) { 260 grpctest.TLogger.ExpectErrorN("injected error", 4) 261 262 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 263 defer cancel() 264 265 // Provide a convenient way to expect backoff calls and return a minimal 266 // value. 267 const backoffShouldNotBeCalled = 9999 // Use to assert backoff function is not called. 268 const backoffAllowAny = -1 // Use to ignore any backoff calls. 269 expectedBackoff := backoffAllowAny 270 oldBackoff := internal.DefaultBackoffFunc 271 internal.DefaultBackoffFunc = func(got int) time.Duration { 272 if expectedBackoff == backoffShouldNotBeCalled { 273 t.Errorf("Unexpected backoff call; parameter = %v", got) 274 } else if expectedBackoff != backoffAllowAny { 275 if got != expectedBackoff { 276 t.Errorf("Unexpected backoff received; got %v want %v", got, expectedBackoff) 277 } 278 } 279 return time.Millisecond 280 } 281 defer func() { internal.DefaultBackoffFunc = oldBackoff }() 282 283 // Initialize listener for our ORCA server. 284 lis, err := testutils.LocalTCPListener() 285 if err != nil { 286 t.Fatal(err) 287 } 288 289 // Register our fake ORCA service. 290 s := grpc.NewServer() 291 fake := newFakeORCAService() 292 defer fake.close() 293 v3orcaservicegrpc.RegisterOpenRcaServiceServer(s, fake) 294 go s.Serve(lis) 295 defer s.Stop() 296 297 // Define the report interval and a function to wait for it to be sent to 298 // the server. 299 const reportInterval = 123 * time.Second 300 awaitRequest := func(interval time.Duration) { 301 select { 302 case req := <-fake.reqCh: 303 if got := req.GetReportInterval().AsDuration(); got != interval { 304 t.Errorf("Unexpected report interval; got %v want %v", got, interval) 305 } 306 case <-ctx.Done(): 307 t.Fatalf("Did not receive client request") 308 } 309 } 310 311 // Create our client with an OOB listener in the LB policy it selects. 312 r := manual.NewBuilderWithScheme("whatever") 313 oobLis := newTestOOBListener() 314 315 lisOpts := orca.OOBListenerOptions{ReportInterval: reportInterval} 316 li := &listenerInfo{listener: oobLis, opts: lisOpts} 317 r.InitialState(resolver.State{Addresses: []resolver.Address{setListenerInfo(resolver.Address{Addr: lis.Addr().String()}, li)}}) 318 cc, err := grpc.Dial("whatever:///whatever", grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"customLB":{}}]}`), grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials())) 319 if err != nil { 320 t.Fatalf("grpc.Dial failed: %v", err) 321 } 322 defer cc.Close() 323 324 // Ensure the OOB listener is stopped before the client is closed to avoid 325 // a potential irrelevant error in the logs. 326 defer oobLis.Stop() 327 328 // Define a load report to send and expect the client to see. 329 loadReportWant := &v3orcapb.OrcaLoadReport{ 330 CpuUtilization: 10, 331 MemUtilization: 0.1, 332 Utilization: map[string]float64{"bob": 0.555}, 333 } 334 335 // Unblock the fake. 336 awaitRequest(reportInterval) 337 fake.respCh <- loadReportWant 338 select { 339 case r := <-oobLis.loadReportCh: 340 t.Log("Load report received: ", r) 341 if proto.Equal(r, loadReportWant) { 342 // Success! 343 break 344 } 345 case <-ctx.Done(): 346 t.Fatalf("timed out waiting for load report: %v", loadReportWant) 347 } 348 349 // The next request should be immediate, since there was a message 350 // received. 351 expectedBackoff = backoffShouldNotBeCalled 352 fake.respCh <- status.Errorf(codes.Internal, "injected error") 353 awaitRequest(reportInterval) 354 355 // The next requests will need to backoff. 356 expectedBackoff = 0 357 fake.respCh <- status.Errorf(codes.Internal, "injected error") 358 awaitRequest(reportInterval) 359 expectedBackoff = 1 360 fake.respCh <- status.Errorf(codes.Internal, "injected error") 361 awaitRequest(reportInterval) 362 expectedBackoff = 2 363 fake.respCh <- status.Errorf(codes.Internal, "injected error") 364 awaitRequest(reportInterval) 365 // The next request should be immediate, since there was a message 366 // received. 367 expectedBackoff = backoffShouldNotBeCalled 368 369 // Send another valid response and wait for it on the client. 370 fake.respCh <- loadReportWant 371 select { 372 case r := <-oobLis.loadReportCh: 373 t.Log("Load report received: ", r) 374 if proto.Equal(r, loadReportWant) { 375 // Success! 376 break 377 } 378 case <-ctx.Done(): 379 t.Fatalf("timed out waiting for load report: %v", loadReportWant) 380 } 381 } 382 383 // TestProducerMultipleListeners tests that multiple listeners works as 384 // expected in a producer: requesting the proper interval and delivering the 385 // update to all listeners. 386 func (s) TestProducerMultipleListeners(t *testing.T) { 387 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 388 defer cancel() 389 390 // Provide a convenient way to expect backoff calls and return a minimal 391 // value. 392 oldBackoff := internal.DefaultBackoffFunc 393 internal.DefaultBackoffFunc = func(got int) time.Duration { 394 return time.Millisecond 395 } 396 defer func() { internal.DefaultBackoffFunc = oldBackoff }() 397 398 // Initialize listener for our ORCA server. 399 lis, err := testutils.LocalTCPListener() 400 if err != nil { 401 t.Fatal(err) 402 } 403 404 // Register our fake ORCA service. 405 s := grpc.NewServer() 406 fake := newFakeORCAService() 407 defer fake.close() 408 v3orcaservicegrpc.RegisterOpenRcaServiceServer(s, fake) 409 go s.Serve(lis) 410 defer s.Stop() 411 412 // Define the report interval and a function to wait for it to be sent to 413 // the server. 414 const reportInterval1 = 123 * time.Second 415 const reportInterval2 = 234 * time.Second 416 const reportInterval3 = 56 * time.Second 417 awaitRequest := func(interval time.Duration) { 418 select { 419 case req := <-fake.reqCh: 420 if got := req.GetReportInterval().AsDuration(); got != interval { 421 t.Errorf("Unexpected report interval; got %v want %v", got, interval) 422 } 423 case <-ctx.Done(): 424 t.Fatalf("Did not receive client request") 425 } 426 } 427 428 // Create our client with an OOB listener in the LB policy it selects. 429 r := manual.NewBuilderWithScheme("whatever") 430 oobLis1 := newTestOOBListener() 431 lisOpts1 := orca.OOBListenerOptions{ReportInterval: reportInterval1} 432 li := &listenerInfo{listener: oobLis1, opts: lisOpts1} 433 r.InitialState(resolver.State{Addresses: []resolver.Address{setListenerInfo(resolver.Address{Addr: lis.Addr().String()}, li)}}) 434 cc, err := grpc.Dial("whatever:///whatever", grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"customLB":{}}]}`), grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials())) 435 if err != nil { 436 t.Fatalf("grpc.Dial failed: %v", err) 437 } 438 defer cc.Close() 439 440 // Ensure the OOB listener is stopped before the client is closed to avoid 441 // a potential irrelevant error in the logs. 442 defer oobLis1.Stop() 443 444 oobLis2 := newTestOOBListener() 445 lisOpts2 := orca.OOBListenerOptions{ReportInterval: reportInterval2} 446 447 oobLis3 := newTestOOBListener() 448 lisOpts3 := orca.OOBListenerOptions{ReportInterval: reportInterval3} 449 450 // Define a load report to send and expect the client to see. 451 loadReportWant := &v3orcapb.OrcaLoadReport{ 452 CpuUtilization: 10, 453 MemUtilization: 0.1, 454 Utilization: map[string]float64{"bob": 0.555}, 455 } 456 457 // Receive reports and update counts for the three listeners. 458 var reportsMu sync.Mutex 459 var reportsReceived1, reportsReceived2, reportsReceived3 int 460 go func() { 461 for { 462 select { 463 case r := <-oobLis1.loadReportCh: 464 t.Log("Load report 1 received: ", r) 465 if !proto.Equal(r, loadReportWant) { 466 t.Errorf("Unexpected report received: %+v", r) 467 } 468 reportsMu.Lock() 469 reportsReceived1++ 470 reportsMu.Unlock() 471 case r := <-oobLis2.loadReportCh: 472 t.Log("Load report 2 received: ", r) 473 if !proto.Equal(r, loadReportWant) { 474 t.Errorf("Unexpected report received: %+v", r) 475 } 476 reportsMu.Lock() 477 reportsReceived2++ 478 reportsMu.Unlock() 479 case r := <-oobLis3.loadReportCh: 480 t.Log("Load report 3 received: ", r) 481 if !proto.Equal(r, loadReportWant) { 482 t.Errorf("Unexpected report received: %+v", r) 483 } 484 reportsMu.Lock() 485 reportsReceived3++ 486 reportsMu.Unlock() 487 case <-ctx.Done(): 488 // Test has ended; exit 489 return 490 } 491 } 492 }() 493 494 // checkReports is a helper function to check the report counts for the three listeners. 495 checkReports := func(r1, r2, r3 int) { 496 t.Helper() 497 for ctx.Err() == nil { 498 reportsMu.Lock() 499 if r1 == reportsReceived1 && r2 == reportsReceived2 && r3 == reportsReceived3 { 500 // Success! 501 reportsMu.Unlock() 502 return 503 } 504 if reportsReceived1 > r1 || reportsReceived2 > r2 || reportsReceived3 > r3 { 505 reportsMu.Unlock() 506 t.Fatalf("received excess reports. got %v %v %v; want %v %v %v", reportsReceived1, reportsReceived2, reportsReceived3, r1, r2, r3) 507 return 508 } 509 reportsMu.Unlock() 510 time.Sleep(10 * time.Millisecond) 511 } 512 t.Fatalf("timed out waiting for reports received. got %v %v %v; want %v %v %v", reportsReceived1, reportsReceived2, reportsReceived3, r1, r2, r3) 513 } 514 515 // Only 1 listener; expect reportInterval1 to be used and expect the report 516 // to be sent to the listener. 517 awaitRequest(reportInterval1) 518 fake.respCh <- loadReportWant 519 checkReports(1, 0, 0) 520 521 // Register listener 2 with a less frequent interval; no need to recreate 522 // stream. Report should go to both listeners. 523 oobLis2.cleanup = orca.RegisterOOBListener(li.sc, oobLis2, lisOpts2) 524 fake.respCh <- loadReportWant 525 checkReports(2, 1, 0) 526 527 // Register listener 3 with a more frequent interval; stream is recreated 528 // with this interval. The next report will go to all three listeners. 529 oobLis3.cleanup = orca.RegisterOOBListener(li.sc, oobLis3, lisOpts3) 530 awaitRequest(reportInterval3) 531 fake.respCh <- loadReportWant 532 checkReports(3, 2, 1) 533 534 // Another report without a change in listeners should go to all three listeners. 535 fake.respCh <- loadReportWant 536 checkReports(4, 3, 2) 537 538 // Stop listener 2. This does not affect the interval as listener 3 is 539 // still the shortest. The next update goes to listeners 1 and 3. 540 oobLis2.Stop() 541 fake.respCh <- loadReportWant 542 checkReports(5, 3, 3) 543 544 // Stop listener 3. This makes the interval longer. Reports should only 545 // go to listener 1 now. 546 oobLis3.Stop() 547 awaitRequest(reportInterval1) 548 fake.respCh <- loadReportWant 549 checkReports(6, 3, 3) 550 // Another report without a change in listeners should go to the first listener. 551 fake.respCh <- loadReportWant 552 checkReports(7, 3, 3) 553 }