google.golang.org/grpc@v1.72.2/balancer/weightedroundrobin/balancer_test.go (about) 1 /* 2 * 3 * Copyright 2023 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package weightedroundrobin_test 20 21 import ( 22 "context" 23 "encoding/json" 24 "fmt" 25 "sync" 26 "sync/atomic" 27 "testing" 28 "time" 29 30 "google.golang.org/grpc" 31 "google.golang.org/grpc/internal" 32 "google.golang.org/grpc/internal/grpctest" 33 "google.golang.org/grpc/internal/stubserver" 34 "google.golang.org/grpc/internal/testutils/roundrobin" 35 "google.golang.org/grpc/internal/testutils/stats" 36 "google.golang.org/grpc/orca" 37 "google.golang.org/grpc/peer" 38 "google.golang.org/grpc/resolver" 39 40 wrr "google.golang.org/grpc/balancer/weightedroundrobin" 41 iwrr "google.golang.org/grpc/balancer/weightedroundrobin/internal" 42 43 testgrpc "google.golang.org/grpc/interop/grpc_testing" 44 testpb "google.golang.org/grpc/interop/grpc_testing" 45 ) 46 47 type s struct { 48 grpctest.Tester 49 } 50 51 func Test(t *testing.T) { 52 grpctest.RunSubTests(t, s{}) 53 } 54 55 const defaultTestTimeout = 10 * time.Second 56 const weightUpdatePeriod = 50 * time.Millisecond 57 const weightExpirationPeriod = time.Minute 58 const oobReportingInterval = 10 * time.Millisecond 59 60 func init() { 61 iwrr.AllowAnyWeightUpdatePeriod = true 62 } 63 64 func boolp(b bool) *bool { return &b } 65 func float64p(f float64) *float64 { return &f } 66 func stringp(s string) *string { return &s } 67 68 var ( 69 perCallConfig = iwrr.LBConfig{ 70 EnableOOBLoadReport: boolp(false), 71 OOBReportingPeriod: stringp("0.005s"), 72 BlackoutPeriod: stringp("0s"), 73 WeightExpirationPeriod: stringp("60s"), 74 WeightUpdatePeriod: stringp(".050s"), 75 ErrorUtilizationPenalty: float64p(0), 76 } 77 oobConfig = iwrr.LBConfig{ 78 EnableOOBLoadReport: boolp(true), 79 OOBReportingPeriod: stringp("0.005s"), 80 BlackoutPeriod: stringp("0s"), 81 WeightExpirationPeriod: stringp("60s"), 82 WeightUpdatePeriod: stringp(".050s"), 83 ErrorUtilizationPenalty: float64p(0), 84 } 85 testMetricsConfig = iwrr.LBConfig{ 86 EnableOOBLoadReport: boolp(false), 87 OOBReportingPeriod: stringp("0.005s"), 88 BlackoutPeriod: stringp("0s"), 89 WeightExpirationPeriod: stringp("60s"), 90 WeightUpdatePeriod: stringp("30s"), 91 ErrorUtilizationPenalty: float64p(0), 92 } 93 ) 94 95 type testServer struct { 96 *stubserver.StubServer 97 98 oobMetrics orca.ServerMetricsRecorder // Attached to the OOB stream. 99 callMetrics orca.CallMetricsRecorder // Attached to per-call metrics. 100 } 101 102 type reportType int 103 104 const ( 105 reportNone reportType = iota 106 reportOOB 107 reportCall 108 reportBoth 109 ) 110 111 func startServer(t *testing.T, r reportType) *testServer { 112 t.Helper() 113 114 smr := orca.NewServerMetricsRecorder() 115 cmr := orca.NewServerMetricsRecorder().(orca.CallMetricsRecorder) 116 117 ss := &stubserver.StubServer{ 118 EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) { 119 if r := orca.CallMetricsRecorderFromContext(ctx); r != nil { 120 // Copy metrics from what the test set in cmr into r. 121 sm := cmr.(orca.ServerMetricsProvider).ServerMetrics() 122 r.SetApplicationUtilization(sm.AppUtilization) 123 r.SetQPS(sm.QPS) 124 r.SetEPS(sm.EPS) 125 } 126 return &testpb.Empty{}, nil 127 }, 128 } 129 130 var sopts []grpc.ServerOption 131 if r == reportCall || r == reportBoth { 132 sopts = append(sopts, orca.CallMetricsServerOption(nil)) 133 } 134 135 if r == reportOOB || r == reportBoth { 136 oso := orca.ServiceOptions{ 137 ServerMetricsProvider: smr, 138 MinReportingInterval: 10 * time.Millisecond, 139 } 140 internal.ORCAAllowAnyMinReportingInterval.(func(so *orca.ServiceOptions))(&oso) 141 sopts = append(sopts, stubserver.RegisterServiceServerOption(func(s grpc.ServiceRegistrar) { 142 if err := orca.Register(s, oso); err != nil { 143 t.Fatalf("Failed to register orca service: %v", err) 144 } 145 })) 146 } 147 148 if err := ss.StartServer(sopts...); err != nil { 149 t.Fatalf("Error starting server: %v", err) 150 } 151 t.Cleanup(ss.Stop) 152 153 return &testServer{ 154 StubServer: ss, 155 oobMetrics: smr, 156 callMetrics: cmr, 157 } 158 } 159 160 func svcConfig(t *testing.T, wrrCfg iwrr.LBConfig) string { 161 t.Helper() 162 m, err := json.Marshal(wrrCfg) 163 if err != nil { 164 t.Fatalf("Error marshaling JSON %v: %v", wrrCfg, err) 165 } 166 sc := fmt.Sprintf(`{"loadBalancingConfig": [ {%q:%v} ] }`, wrr.Name, string(m)) 167 t.Logf("Marshaled service config: %v", sc) 168 return sc 169 } 170 171 // Tests basic functionality with one address. With only one address, load 172 // reporting doesn't affect routing at all. 173 func (s) TestBalancer_OneAddress(t *testing.T) { 174 testCases := []struct { 175 rt reportType 176 cfg iwrr.LBConfig 177 }{ 178 {rt: reportNone, cfg: perCallConfig}, 179 {rt: reportCall, cfg: perCallConfig}, 180 {rt: reportOOB, cfg: oobConfig}, 181 } 182 183 for _, tc := range testCases { 184 t.Run(fmt.Sprintf("reportType:%v", tc.rt), func(t *testing.T) { 185 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 186 defer cancel() 187 188 srv := startServer(t, tc.rt) 189 190 sc := svcConfig(t, tc.cfg) 191 if err := srv.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { 192 t.Fatalf("Error starting client: %v", err) 193 } 194 195 // Perform many RPCs to ensure the LB policy works with 1 address. 196 for i := 0; i < 100; i++ { 197 srv.callMetrics.SetQPS(float64(i)) 198 srv.oobMetrics.SetQPS(float64(i)) 199 if _, err := srv.Client.EmptyCall(ctx, &testpb.Empty{}); err != nil { 200 t.Fatalf("Error from EmptyCall: %v", err) 201 } 202 time.Sleep(time.Millisecond) // Delay; test will run 100ms and should perform ~10 weight updates 203 } 204 }) 205 } 206 } 207 208 // TestWRRMetricsBasic tests metrics emitted from the WRR balancer. It 209 // configures a weighted round robin balancer as the top level balancer of a 210 // ClientConn, and configures a fake stats handler on the ClientConn to receive 211 // metrics. It verifies stats emitted from the Weighted Round Robin Balancer on 212 // balancer startup case which triggers the first picker and scheduler update 213 // before any load reports are received. 214 // 215 // Note that this test and others, metrics emission assertions are a snapshot 216 // of the most recently emitted metrics. This is due to the nondeterminism of 217 // scheduler updates with respect to test bodies, so the assertions made are 218 // from the most recently synced state of the system (picker/scheduler) from the 219 // test body. 220 func (s) TestWRRMetricsBasic(t *testing.T) { 221 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 222 defer cancel() 223 224 srv := startServer(t, reportCall) 225 sc := svcConfig(t, testMetricsConfig) 226 227 tmr := stats.NewTestMetricsRecorder() 228 if err := srv.StartClient(grpc.WithDefaultServiceConfig(sc), grpc.WithStatsHandler(tmr)); err != nil { 229 t.Fatalf("Error starting client: %v", err) 230 } 231 srv.callMetrics.SetQPS(float64(1)) 232 233 if _, err := srv.Client.EmptyCall(ctx, &testpb.Empty{}); err != nil { 234 t.Fatalf("Error from EmptyCall: %v", err) 235 } 236 237 if got, _ := tmr.Metric("grpc.lb.wrr.rr_fallback"); got != 1 { 238 t.Fatalf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.wrr.rr_fallback", got, 1) 239 } 240 if got, _ := tmr.Metric("grpc.lb.wrr.endpoint_weight_stale"); got != 0 { 241 t.Fatalf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.wrr.endpoint_weight_stale", got, 0) 242 } 243 if got, _ := tmr.Metric("grpc.lb.wrr.endpoint_weight_not_yet_usable"); got != 1 { 244 t.Fatalf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.wrr.endpoint_weight_not_yet_usable", got, 1) 245 } 246 // Unusable, so no endpoint weight. Due to only one SubConn, this will never 247 // update the weight. Thus, this will stay 0. 248 if got, _ := tmr.Metric("grpc.lb.wrr.endpoint_weight_stale"); got != 0 { 249 t.Fatalf("Unexpected data for metric %v, got: %v, want: %v", "grpc.lb.wrr.endpoint_weight_stale", got, 0) 250 } 251 } 252 253 // Tests two addresses with ORCA reporting disabled (should fall back to pure 254 // RR). 255 func (s) TestBalancer_TwoAddresses_ReportingDisabled(t *testing.T) { 256 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 257 defer cancel() 258 259 srv1 := startServer(t, reportNone) 260 srv2 := startServer(t, reportNone) 261 262 sc := svcConfig(t, perCallConfig) 263 if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { 264 t.Fatalf("Error starting client: %v", err) 265 } 266 addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} 267 srv1.R.UpdateState(resolver.State{Addresses: addrs}) 268 269 // Perform many RPCs to ensure the LB policy works with 2 addresses. 270 for i := 0; i < 20; i++ { 271 roundrobin.CheckRoundRobinRPCs(ctx, srv1.Client, addrs) 272 } 273 } 274 275 // Tests two addresses with per-call ORCA reporting enabled. Checks the 276 // backends are called in the appropriate ratios. 277 func (s) TestBalancer_TwoAddresses_ReportingEnabledPerCall(t *testing.T) { 278 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 279 defer cancel() 280 281 srv1 := startServer(t, reportCall) 282 srv2 := startServer(t, reportCall) 283 284 // srv1 starts loaded and srv2 starts without load; ensure RPCs are routed 285 // disproportionately to srv2 (10:1). 286 srv1.callMetrics.SetQPS(10.0) 287 srv1.callMetrics.SetApplicationUtilization(1.0) 288 289 srv2.callMetrics.SetQPS(10.0) 290 srv2.callMetrics.SetApplicationUtilization(.1) 291 292 sc := svcConfig(t, perCallConfig) 293 if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { 294 t.Fatalf("Error starting client: %v", err) 295 } 296 addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} 297 srv1.R.UpdateState(resolver.State{Addresses: addrs}) 298 299 // Call each backend once to ensure the weights have been received. 300 ensureReached(ctx, t, srv1.Client, 2) 301 302 // Wait for the weight update period to allow the new weights to be processed. 303 time.Sleep(weightUpdatePeriod) 304 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) 305 } 306 307 // Tests two addresses with OOB ORCA reporting enabled. Checks the backends 308 // are called in the appropriate ratios. 309 func (s) TestBalancer_TwoAddresses_ReportingEnabledOOB(t *testing.T) { 310 testCases := []struct { 311 name string 312 utilSetter func(orca.ServerMetricsRecorder, float64) 313 }{{ 314 name: "application_utilization", 315 utilSetter: func(smr orca.ServerMetricsRecorder, val float64) { 316 smr.SetApplicationUtilization(val) 317 }, 318 }, { 319 name: "cpu_utilization", 320 utilSetter: func(smr orca.ServerMetricsRecorder, val float64) { 321 smr.SetCPUUtilization(val) 322 }, 323 }, { 324 name: "application over cpu", 325 utilSetter: func(smr orca.ServerMetricsRecorder, val float64) { 326 smr.SetApplicationUtilization(val) 327 smr.SetCPUUtilization(2.0) // ignored because ApplicationUtilization is set 328 }, 329 }} 330 331 for _, tc := range testCases { 332 t.Run(tc.name, func(t *testing.T) { 333 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 334 defer cancel() 335 336 srv1 := startServer(t, reportOOB) 337 srv2 := startServer(t, reportOOB) 338 339 // srv1 starts loaded and srv2 starts without load; ensure RPCs are routed 340 // disproportionately to srv2 (10:1). 341 srv1.oobMetrics.SetQPS(10.0) 342 tc.utilSetter(srv1.oobMetrics, 1.0) 343 344 srv2.oobMetrics.SetQPS(10.0) 345 tc.utilSetter(srv2.oobMetrics, 0.1) 346 347 sc := svcConfig(t, oobConfig) 348 if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { 349 t.Fatalf("Error starting client: %v", err) 350 } 351 addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} 352 srv1.R.UpdateState(resolver.State{Addresses: addrs}) 353 354 // Call each backend once to ensure the weights have been received. 355 ensureReached(ctx, t, srv1.Client, 2) 356 357 // Wait for the weight update period to allow the new weights to be processed. 358 time.Sleep(weightUpdatePeriod) 359 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) 360 }) 361 } 362 } 363 364 // Tests two addresses with OOB ORCA reporting enabled, where the reports 365 // change over time. Checks the backends are called in the appropriate ratios 366 // before and after modifying the reports. 367 func (s) TestBalancer_TwoAddresses_UpdateLoads(t *testing.T) { 368 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 369 defer cancel() 370 371 srv1 := startServer(t, reportOOB) 372 srv2 := startServer(t, reportOOB) 373 374 // srv1 starts loaded and srv2 starts without load; ensure RPCs are routed 375 // disproportionately to srv2 (10:1). 376 srv1.oobMetrics.SetQPS(10.0) 377 srv1.oobMetrics.SetApplicationUtilization(1.0) 378 379 srv2.oobMetrics.SetQPS(10.0) 380 srv2.oobMetrics.SetApplicationUtilization(.1) 381 382 sc := svcConfig(t, oobConfig) 383 if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { 384 t.Fatalf("Error starting client: %v", err) 385 } 386 addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} 387 srv1.R.UpdateState(resolver.State{Addresses: addrs}) 388 389 // Call each backend once to ensure the weights have been received. 390 ensureReached(ctx, t, srv1.Client, 2) 391 392 // Wait for the weight update period to allow the new weights to be processed. 393 time.Sleep(weightUpdatePeriod) 394 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) 395 396 // Update the loads so srv2 is loaded and srv1 is not; ensure RPCs are 397 // routed disproportionately to srv1. 398 srv1.oobMetrics.SetQPS(10.0) 399 srv1.oobMetrics.SetApplicationUtilization(.1) 400 401 srv2.oobMetrics.SetQPS(10.0) 402 srv2.oobMetrics.SetApplicationUtilization(1.0) 403 404 // Wait for the weight update period to allow the new weights to be processed. 405 time.Sleep(weightUpdatePeriod + oobReportingInterval) 406 checkWeights(ctx, t, srvWeight{srv1, 10}, srvWeight{srv2, 1}) 407 } 408 409 // Tests two addresses with OOB ORCA reporting enabled, then with switching to 410 // per-call reporting. Checks the backends are called in the appropriate 411 // ratios before and after the change. 412 func (s) TestBalancer_TwoAddresses_OOBThenPerCall(t *testing.T) { 413 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 414 defer cancel() 415 416 srv1 := startServer(t, reportBoth) 417 srv2 := startServer(t, reportBoth) 418 419 // srv1 starts loaded and srv2 starts without load; ensure RPCs are routed 420 // disproportionately to srv2 (10:1). 421 srv1.oobMetrics.SetQPS(10.0) 422 srv1.oobMetrics.SetApplicationUtilization(1.0) 423 424 srv2.oobMetrics.SetQPS(10.0) 425 srv2.oobMetrics.SetApplicationUtilization(.1) 426 427 // For per-call metrics (not used initially), srv2 reports that it is 428 // loaded and srv1 reports low load. After confirming OOB works, switch to 429 // per-call and confirm the new routing weights are applied. 430 srv1.callMetrics.SetQPS(10.0) 431 srv1.callMetrics.SetApplicationUtilization(.1) 432 433 srv2.callMetrics.SetQPS(10.0) 434 srv2.callMetrics.SetApplicationUtilization(1.0) 435 436 sc := svcConfig(t, oobConfig) 437 if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { 438 t.Fatalf("Error starting client: %v", err) 439 } 440 addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} 441 srv1.R.UpdateState(resolver.State{Addresses: addrs}) 442 443 // Call each backend once to ensure the weights have been received. 444 ensureReached(ctx, t, srv1.Client, 2) 445 446 // Wait for the weight update period to allow the new weights to be processed. 447 time.Sleep(weightUpdatePeriod) 448 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) 449 450 // Update to per-call weights. 451 c := svcConfig(t, perCallConfig) 452 parsedCfg := srv1.R.CC().ParseServiceConfig(c) 453 if parsedCfg.Err != nil { 454 panic(fmt.Sprintf("Error parsing config %q: %v", c, parsedCfg.Err)) 455 } 456 srv1.R.UpdateState(resolver.State{Addresses: addrs, ServiceConfig: parsedCfg}) 457 458 // Wait for the weight update period to allow the new weights to be processed. 459 time.Sleep(weightUpdatePeriod) 460 checkWeights(ctx, t, srvWeight{srv1, 10}, srvWeight{srv2, 1}) 461 } 462 463 // TestEndpoints_SharedAddress tests the case where two endpoints have the same 464 // address. The expected behavior is undefined, however the program should not 465 // crash. 466 func (s) TestEndpoints_SharedAddress(t *testing.T) { 467 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 468 defer cancel() 469 470 srv := startServer(t, reportCall) 471 sc := svcConfig(t, perCallConfig) 472 if err := srv.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { 473 t.Fatalf("Error starting client: %v", err) 474 } 475 476 endpointsSharedAddress := []resolver.Endpoint{{Addresses: []resolver.Address{{Addr: srv.Address}}}, {Addresses: []resolver.Address{{Addr: srv.Address}}}} 477 srv.R.UpdateState(resolver.State{Endpoints: endpointsSharedAddress}) 478 479 // Make some RPC's and make sure doesn't crash. It should go to one of the 480 // endpoints addresses, it's undefined which one it will choose and the load 481 // reporting might not work, but it should be able to make an RPC. 482 for i := 0; i < 10; i++ { 483 if _, err := srv.Client.EmptyCall(ctx, &testpb.Empty{}); err != nil { 484 t.Fatalf("EmptyCall failed with err: %v", err) 485 } 486 } 487 } 488 489 // TestEndpoints_MultipleAddresses tests WRR on endpoints with numerous 490 // addresses. It configures WRR with two endpoints with one bad address followed 491 // by a good address. It configures two backends that each report per call 492 // metrics, each corresponding to the two endpoints good address. It then 493 // asserts load is distributed as expected corresponding to the call metrics 494 // received. 495 func (s) TestEndpoints_MultipleAddresses(t *testing.T) { 496 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 497 defer cancel() 498 srv1 := startServer(t, reportCall) 499 srv2 := startServer(t, reportCall) 500 501 srv1.callMetrics.SetQPS(10.0) 502 srv1.callMetrics.SetApplicationUtilization(.1) 503 504 srv2.callMetrics.SetQPS(10.0) 505 srv2.callMetrics.SetApplicationUtilization(1.0) 506 507 sc := svcConfig(t, perCallConfig) 508 if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { 509 t.Fatalf("Error starting client: %v", err) 510 } 511 512 twoEndpoints := []resolver.Endpoint{{Addresses: []resolver.Address{{Addr: "bad-address-1"}, {Addr: srv1.Address}}}, {Addresses: []resolver.Address{{Addr: "bad-address-2"}, {Addr: srv2.Address}}}} 513 srv1.R.UpdateState(resolver.State{Endpoints: twoEndpoints}) 514 515 // Call each backend once to ensure the weights have been received. 516 ensureReached(ctx, t, srv1.Client, 2) 517 // Wait for the weight update period to allow the new weights to be processed. 518 time.Sleep(weightUpdatePeriod) 519 checkWeights(ctx, t, srvWeight{srv1, 10}, srvWeight{srv2, 1}) 520 } 521 522 // Tests two addresses with OOB ORCA reporting enabled and a non-zero error 523 // penalty applied. 524 func (s) TestBalancer_TwoAddresses_ErrorPenalty(t *testing.T) { 525 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 526 defer cancel() 527 528 srv1 := startServer(t, reportOOB) 529 srv2 := startServer(t, reportOOB) 530 531 // srv1 starts loaded and srv2 starts without load; ensure RPCs are routed 532 // disproportionately to srv2 (10:1). EPS values are set (but ignored 533 // initially due to ErrorUtilizationPenalty=0). Later EUP will be updated 534 // to 0.9 which will cause the weights to be equal and RPCs to be routed 535 // 50/50. 536 srv1.oobMetrics.SetQPS(10.0) 537 srv1.oobMetrics.SetApplicationUtilization(1.0) 538 srv1.oobMetrics.SetEPS(0) 539 // srv1 weight before: 10.0 / 1.0 = 10.0 540 // srv1 weight after: 10.0 / 1.0 = 10.0 541 542 srv2.oobMetrics.SetQPS(10.0) 543 srv2.oobMetrics.SetApplicationUtilization(.1) 544 srv2.oobMetrics.SetEPS(10.0) 545 // srv2 weight before: 10.0 / 0.1 = 100.0 546 // srv2 weight after: 10.0 / 1.0 = 10.0 547 548 sc := svcConfig(t, oobConfig) 549 if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { 550 t.Fatalf("Error starting client: %v", err) 551 } 552 addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} 553 srv1.R.UpdateState(resolver.State{Addresses: addrs}) 554 555 // Call each backend once to ensure the weights have been received. 556 ensureReached(ctx, t, srv1.Client, 2) 557 558 // Wait for the weight update period to allow the new weights to be processed. 559 time.Sleep(weightUpdatePeriod) 560 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) 561 562 // Update to include an error penalty in the weights. 563 newCfg := oobConfig 564 newCfg.ErrorUtilizationPenalty = float64p(0.9) 565 c := svcConfig(t, newCfg) 566 parsedCfg := srv1.R.CC().ParseServiceConfig(c) 567 if parsedCfg.Err != nil { 568 panic(fmt.Sprintf("Error parsing config %q: %v", c, parsedCfg.Err)) 569 } 570 srv1.R.UpdateState(resolver.State{Addresses: addrs, ServiceConfig: parsedCfg}) 571 572 // Wait for the weight update period to allow the new weights to be processed. 573 time.Sleep(weightUpdatePeriod + oobReportingInterval) 574 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 1}) 575 } 576 577 // Tests that the blackout period causes backends to use 0 as their weight 578 // (meaning to use the average weight) until the blackout period elapses. 579 func (s) TestBalancer_TwoAddresses_BlackoutPeriod(t *testing.T) { 580 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 581 defer cancel() 582 583 var mu sync.Mutex 584 start := time.Now() 585 now := start 586 setNow := func(t time.Time) { 587 mu.Lock() 588 defer mu.Unlock() 589 now = t 590 } 591 592 setTimeNow(func() time.Time { 593 mu.Lock() 594 defer mu.Unlock() 595 return now 596 }) 597 t.Cleanup(func() { setTimeNow(time.Now) }) 598 599 testCases := []struct { 600 blackoutPeriodCfg *string 601 blackoutPeriod time.Duration 602 }{{ 603 blackoutPeriodCfg: stringp("1s"), 604 blackoutPeriod: time.Second, 605 }, { 606 blackoutPeriodCfg: nil, 607 blackoutPeriod: 10 * time.Second, // the default 608 }} 609 for _, tc := range testCases { 610 setNow(start) 611 srv1 := startServer(t, reportOOB) 612 srv2 := startServer(t, reportOOB) 613 614 // srv1 starts loaded and srv2 starts without load; ensure RPCs are routed 615 // disproportionately to srv2 (10:1). 616 srv1.oobMetrics.SetQPS(10.0) 617 srv1.oobMetrics.SetApplicationUtilization(1.0) 618 619 srv2.oobMetrics.SetQPS(10.0) 620 srv2.oobMetrics.SetApplicationUtilization(.1) 621 622 cfg := oobConfig 623 cfg.BlackoutPeriod = tc.blackoutPeriodCfg 624 sc := svcConfig(t, cfg) 625 if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { 626 t.Fatalf("Error starting client: %v", err) 627 } 628 addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} 629 srv1.R.UpdateState(resolver.State{Addresses: addrs}) 630 631 // Call each backend once to ensure the weights have been received. 632 ensureReached(ctx, t, srv1.Client, 2) 633 634 // Wait for the weight update period to allow the new weights to be processed. 635 time.Sleep(weightUpdatePeriod) 636 // During the blackout period (1s) we should route roughly 50/50. 637 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 1}) 638 639 // Advance time to right before the blackout period ends and the weights 640 // should still be zero. 641 setNow(start.Add(tc.blackoutPeriod - time.Nanosecond)) 642 // Wait for the weight update period to allow the new weights to be processed. 643 time.Sleep(weightUpdatePeriod) 644 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 1}) 645 646 // Advance time to right after the blackout period ends and the weights 647 // should now activate. 648 setNow(start.Add(tc.blackoutPeriod)) 649 // Wait for the weight update period to allow the new weights to be processed. 650 time.Sleep(weightUpdatePeriod) 651 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) 652 } 653 } 654 655 // Tests that the weight expiration period causes backends to use 0 as their 656 // weight (meaning to use the average weight) once the expiration period 657 // elapses. 658 func (s) TestBalancer_TwoAddresses_WeightExpiration(t *testing.T) { 659 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 660 defer cancel() 661 662 var mu sync.Mutex 663 start := time.Now() 664 now := start 665 setNow := func(t time.Time) { 666 mu.Lock() 667 defer mu.Unlock() 668 now = t 669 } 670 setTimeNow(func() time.Time { 671 mu.Lock() 672 defer mu.Unlock() 673 return now 674 }) 675 t.Cleanup(func() { setTimeNow(time.Now) }) 676 677 srv1 := startServer(t, reportBoth) 678 srv2 := startServer(t, reportBoth) 679 680 // srv1 starts loaded and srv2 starts without load; ensure RPCs are routed 681 // disproportionately to srv2 (10:1). Because the OOB reporting interval 682 // is 1 minute but the weights expire in 1 second, routing will go to 50/50 683 // after the weights expire. 684 srv1.oobMetrics.SetQPS(10.0) 685 srv1.oobMetrics.SetApplicationUtilization(1.0) 686 687 srv2.oobMetrics.SetQPS(10.0) 688 srv2.oobMetrics.SetApplicationUtilization(.1) 689 690 cfg := oobConfig 691 cfg.OOBReportingPeriod = stringp("60s") 692 sc := svcConfig(t, cfg) 693 if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { 694 t.Fatalf("Error starting client: %v", err) 695 } 696 addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}} 697 srv1.R.UpdateState(resolver.State{Addresses: addrs}) 698 699 // Call each backend once to ensure the weights have been received. 700 ensureReached(ctx, t, srv1.Client, 2) 701 702 // Wait for the weight update period to allow the new weights to be processed. 703 time.Sleep(weightUpdatePeriod) 704 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) 705 706 // Advance what time.Now returns to the weight expiration time minus 1s to 707 // ensure all weights are still honored. 708 setNow(start.Add(weightExpirationPeriod - time.Second)) 709 710 // Wait for the weight update period to allow the new weights to be processed. 711 time.Sleep(weightUpdatePeriod) 712 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}) 713 714 // Advance what time.Now returns to the weight expiration time plus 1s to 715 // ensure all weights expired and addresses are routed evenly. 716 setNow(start.Add(weightExpirationPeriod + time.Second)) 717 718 // Wait for the weight expiration period so the weights have expired. 719 time.Sleep(weightUpdatePeriod) 720 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 1}) 721 } 722 723 // Tests logic surrounding subchannel management. 724 func (s) TestBalancer_AddressesChanging(t *testing.T) { 725 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 726 defer cancel() 727 728 srv1 := startServer(t, reportBoth) 729 srv2 := startServer(t, reportBoth) 730 srv3 := startServer(t, reportBoth) 731 srv4 := startServer(t, reportBoth) 732 733 // srv1: weight 10 734 srv1.oobMetrics.SetQPS(10.0) 735 srv1.oobMetrics.SetApplicationUtilization(1.0) 736 // srv2: weight 100 737 srv2.oobMetrics.SetQPS(10.0) 738 srv2.oobMetrics.SetApplicationUtilization(.1) 739 // srv3: weight 20 740 srv3.oobMetrics.SetQPS(20.0) 741 srv3.oobMetrics.SetApplicationUtilization(1.0) 742 // srv4: weight 200 743 srv4.oobMetrics.SetQPS(20.0) 744 srv4.oobMetrics.SetApplicationUtilization(.1) 745 746 sc := svcConfig(t, oobConfig) 747 if err := srv1.StartClient(grpc.WithDefaultServiceConfig(sc)); err != nil { 748 t.Fatalf("Error starting client: %v", err) 749 } 750 srv2.Client = srv1.Client 751 addrs := []resolver.Address{{Addr: srv1.Address}, {Addr: srv2.Address}, {Addr: srv3.Address}} 752 srv1.R.UpdateState(resolver.State{Addresses: addrs}) 753 754 // Call each backend once to ensure the weights have been received. 755 ensureReached(ctx, t, srv1.Client, 3) 756 time.Sleep(weightUpdatePeriod) 757 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}, srvWeight{srv3, 2}) 758 759 // Add backend 4 760 addrs = append(addrs, resolver.Address{Addr: srv4.Address}) 761 srv1.R.UpdateState(resolver.State{Addresses: addrs}) 762 time.Sleep(weightUpdatePeriod) 763 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}, srvWeight{srv3, 2}, srvWeight{srv4, 20}) 764 765 // Shutdown backend 3. RPCs will no longer be routed to it. 766 srv3.Stop() 767 time.Sleep(weightUpdatePeriod) 768 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv2, 10}, srvWeight{srv4, 20}) 769 770 // Remove addresses 2 and 3. RPCs will no longer be routed to 2 either. 771 addrs = []resolver.Address{{Addr: srv1.Address}, {Addr: srv4.Address}} 772 srv1.R.UpdateState(resolver.State{Addresses: addrs}) 773 time.Sleep(weightUpdatePeriod) 774 checkWeights(ctx, t, srvWeight{srv1, 1}, srvWeight{srv4, 20}) 775 776 // Re-add 2 and remove the rest. 777 addrs = []resolver.Address{{Addr: srv2.Address}} 778 srv1.R.UpdateState(resolver.State{Addresses: addrs}) 779 time.Sleep(weightUpdatePeriod) 780 checkWeights(ctx, t, srvWeight{srv2, 10}) 781 782 // Re-add 4. 783 addrs = append(addrs, resolver.Address{Addr: srv4.Address}) 784 srv1.R.UpdateState(resolver.State{Addresses: addrs}) 785 time.Sleep(weightUpdatePeriod) 786 checkWeights(ctx, t, srvWeight{srv2, 10}, srvWeight{srv4, 20}) 787 } 788 789 func ensureReached(ctx context.Context, t *testing.T, c testgrpc.TestServiceClient, n int) { 790 t.Helper() 791 reached := make(map[string]struct{}) 792 for len(reached) != n { 793 var peer peer.Peer 794 if _, err := c.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&peer)); err != nil { 795 t.Fatalf("Error from EmptyCall: %v", err) 796 } 797 reached[peer.Addr.String()] = struct{}{} 798 } 799 } 800 801 type srvWeight struct { 802 srv *testServer 803 w int 804 } 805 806 const rrIterations = 100 807 808 // checkWeights does rrIterations RPCs and expects the different backends to be 809 // routed in a ratio as determined by the srvWeights passed in. Allows for 810 // some variance (+/- 2 RPCs per backend). 811 func checkWeights(ctx context.Context, t *testing.T, sws ...srvWeight) { 812 t.Helper() 813 814 c := sws[0].srv.Client 815 816 // Replace the weights with approximate counts of RPCs wanted given the 817 // iterations performed. 818 weightSum := 0 819 for _, sw := range sws { 820 weightSum += sw.w 821 } 822 for i := range sws { 823 sws[i].w = rrIterations * sws[i].w / weightSum 824 } 825 826 for attempts := 0; attempts < 10; attempts++ { 827 serverCounts := make(map[string]int) 828 for i := 0; i < rrIterations; i++ { 829 var peer peer.Peer 830 if _, err := c.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&peer)); err != nil { 831 t.Fatalf("Error from EmptyCall: %v; timed out waiting for weighted RR behavior?", err) 832 } 833 serverCounts[peer.Addr.String()]++ 834 } 835 if len(serverCounts) != len(sws) { 836 continue 837 } 838 success := true 839 for _, sw := range sws { 840 c := serverCounts[sw.srv.Address] 841 if c < sw.w-2 || c > sw.w+2 { 842 success = false 843 break 844 } 845 } 846 if success { 847 t.Logf("Passed iteration %v; counts: %v", attempts, serverCounts) 848 return 849 } 850 t.Logf("Failed iteration %v; counts: %v; want %+v", attempts, serverCounts, sws) 851 time.Sleep(5 * time.Millisecond) 852 } 853 t.Fatalf("Failed to route RPCs with proper ratio") 854 } 855 856 func init() { 857 setTimeNow(time.Now) 858 iwrr.TimeNow = timeNow 859 } 860 861 var timeNowFunc atomic.Value // func() time.Time 862 863 func timeNow() time.Time { 864 return timeNowFunc.Load().(func() time.Time)() 865 } 866 867 func setTimeNow(f func() time.Time) { 868 timeNowFunc.Store(f) 869 }