google.golang.org/grpc@v1.62.1/test/pickfirst_test.go (about) 1 /* 2 * 3 * Copyright 2022 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package test 20 21 import ( 22 "context" 23 "errors" 24 "fmt" 25 "strings" 26 "testing" 27 "time" 28 29 "google.golang.org/grpc" 30 "google.golang.org/grpc/backoff" 31 "google.golang.org/grpc/codes" 32 "google.golang.org/grpc/connectivity" 33 "google.golang.org/grpc/credentials/insecure" 34 "google.golang.org/grpc/internal" 35 "google.golang.org/grpc/internal/channelz" 36 "google.golang.org/grpc/internal/grpcrand" 37 "google.golang.org/grpc/internal/stubserver" 38 "google.golang.org/grpc/internal/testutils" 39 "google.golang.org/grpc/internal/testutils/pickfirst" 40 "google.golang.org/grpc/resolver" 41 "google.golang.org/grpc/resolver/manual" 42 "google.golang.org/grpc/serviceconfig" 43 "google.golang.org/grpc/status" 44 45 testgrpc "google.golang.org/grpc/interop/grpc_testing" 46 testpb "google.golang.org/grpc/interop/grpc_testing" 47 ) 48 49 const pickFirstServiceConfig = `{"loadBalancingConfig": [{"pick_first":{}}]}` 50 51 // setupPickFirst performs steps required for pick_first tests. It starts a 52 // bunch of backends exporting the TestService, creates a ClientConn to them 53 // with service config specifying the use of the pick_first LB policy. 54 func setupPickFirst(t *testing.T, backendCount int, opts ...grpc.DialOption) (*grpc.ClientConn, *manual.Resolver, []*stubserver.StubServer) { 55 t.Helper() 56 57 r := manual.NewBuilderWithScheme("whatever") 58 59 backends := make([]*stubserver.StubServer, backendCount) 60 addrs := make([]resolver.Address, backendCount) 61 for i := 0; i < backendCount; i++ { 62 backend := &stubserver.StubServer{ 63 EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) { 64 return &testpb.Empty{}, nil 65 }, 66 } 67 if err := backend.StartServer(); err != nil { 68 t.Fatalf("Failed to start backend: %v", err) 69 } 70 t.Logf("Started TestService backend at: %q", backend.Address) 71 t.Cleanup(func() { backend.Stop() }) 72 73 backends[i] = backend 74 addrs[i] = resolver.Address{Addr: backend.Address} 75 } 76 77 dopts := []grpc.DialOption{ 78 grpc.WithTransportCredentials(insecure.NewCredentials()), 79 grpc.WithResolvers(r), 80 grpc.WithDefaultServiceConfig(pickFirstServiceConfig), 81 } 82 dopts = append(dopts, opts...) 83 cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...) 84 if err != nil { 85 t.Fatalf("grpc.Dial() failed: %v", err) 86 } 87 t.Cleanup(func() { cc.Close() }) 88 89 // At this point, the resolver has not returned any addresses to the channel. 90 // This RPC must block until the context expires. 91 sCtx, sCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) 92 defer sCancel() 93 client := testgrpc.NewTestServiceClient(cc) 94 if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded { 95 t.Fatalf("EmptyCall() = %s, want %s", status.Code(err), codes.DeadlineExceeded) 96 } 97 return cc, r, backends 98 } 99 100 // stubBackendsToResolverAddrs converts from a set of stub server backends to 101 // resolver addresses. Useful when pushing addresses to the manual resolver. 102 func stubBackendsToResolverAddrs(backends []*stubserver.StubServer) []resolver.Address { 103 addrs := make([]resolver.Address, len(backends)) 104 for i, backend := range backends { 105 addrs[i] = resolver.Address{Addr: backend.Address} 106 } 107 return addrs 108 } 109 110 // TestPickFirst_OneBackend tests the most basic scenario for pick_first. It 111 // brings up a single backend and verifies that all RPCs get routed to it. 112 func (s) TestPickFirst_OneBackend(t *testing.T) { 113 cc, r, backends := setupPickFirst(t, 1) 114 115 addrs := stubBackendsToResolverAddrs(backends) 116 r.UpdateState(resolver.State{Addresses: addrs}) 117 118 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 119 defer cancel() 120 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 121 t.Fatal(err) 122 } 123 } 124 125 // TestPickFirst_MultipleBackends tests the scenario with multiple backends and 126 // verifies that all RPCs get routed to the first one. 127 func (s) TestPickFirst_MultipleBackends(t *testing.T) { 128 cc, r, backends := setupPickFirst(t, 2) 129 130 addrs := stubBackendsToResolverAddrs(backends) 131 r.UpdateState(resolver.State{Addresses: addrs}) 132 133 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 134 defer cancel() 135 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 136 t.Fatal(err) 137 } 138 } 139 140 // TestPickFirst_OneServerDown tests the scenario where we have multiple 141 // backends and pick_first is working as expected. Verifies that RPCs get routed 142 // to the next backend in the list when the first one goes down. 143 func (s) TestPickFirst_OneServerDown(t *testing.T) { 144 cc, r, backends := setupPickFirst(t, 2) 145 146 addrs := stubBackendsToResolverAddrs(backends) 147 r.UpdateState(resolver.State{Addresses: addrs}) 148 149 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 150 defer cancel() 151 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 152 t.Fatal(err) 153 } 154 155 // Stop the backend which is currently being used. RPCs should get routed to 156 // the next backend in the list. 157 backends[0].Stop() 158 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { 159 t.Fatal(err) 160 } 161 } 162 163 // TestPickFirst_AllServersDown tests the scenario where we have multiple 164 // backends and pick_first is working as expected. When all backends go down, 165 // the test verifies that RPCs fail with appropriate status code. 166 func (s) TestPickFirst_AllServersDown(t *testing.T) { 167 cc, r, backends := setupPickFirst(t, 2) 168 169 addrs := stubBackendsToResolverAddrs(backends) 170 r.UpdateState(resolver.State{Addresses: addrs}) 171 172 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 173 defer cancel() 174 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 175 t.Fatal(err) 176 } 177 178 for _, b := range backends { 179 b.Stop() 180 } 181 182 client := testgrpc.NewTestServiceClient(cc) 183 for { 184 if ctx.Err() != nil { 185 t.Fatalf("channel failed to move to Unavailable after all backends were stopped: %v", ctx.Err()) 186 } 187 if _, err := client.EmptyCall(ctx, &testpb.Empty{}); status.Code(err) == codes.Unavailable { 188 return 189 } 190 time.Sleep(defaultTestShortTimeout) 191 } 192 } 193 194 // TestPickFirst_AddressesRemoved tests the scenario where we have multiple 195 // backends and pick_first is working as expected. It then verifies that when 196 // addresses are removed by the name resolver, RPCs get routed appropriately. 197 func (s) TestPickFirst_AddressesRemoved(t *testing.T) { 198 cc, r, backends := setupPickFirst(t, 3) 199 200 addrs := stubBackendsToResolverAddrs(backends) 201 r.UpdateState(resolver.State{Addresses: addrs}) 202 203 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 204 defer cancel() 205 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 206 t.Fatal(err) 207 } 208 209 // Remove the first backend from the list of addresses originally pushed. 210 // RPCs should get routed to the first backend in the new list. 211 r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[1], addrs[2]}}) 212 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { 213 t.Fatal(err) 214 } 215 216 // Append the backend that we just removed to the end of the list. 217 // Nothing should change. 218 r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[1], addrs[2], addrs[0]}}) 219 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { 220 t.Fatal(err) 221 } 222 223 // Remove the first backend from the existing list of addresses. 224 // RPCs should get routed to the first backend in the new list. 225 r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[2], addrs[0]}}) 226 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[2]); err != nil { 227 t.Fatal(err) 228 } 229 230 // Remove the first backend from the existing list of addresses. 231 // RPCs should get routed to the first backend in the new list. 232 r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0]}}) 233 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 234 t.Fatal(err) 235 } 236 } 237 238 // TestPickFirst_NewAddressWhileBlocking tests the case where pick_first is 239 // configured on a channel, things are working as expected and then a resolver 240 // updates removes all addresses. An RPC attempted at this point in time will be 241 // blocked because there are no valid backends. This test verifies that when new 242 // backends are added, the RPC is able to complete. 243 func (s) TestPickFirst_NewAddressWhileBlocking(t *testing.T) { 244 cc, r, backends := setupPickFirst(t, 2) 245 addrs := stubBackendsToResolverAddrs(backends) 246 r.UpdateState(resolver.State{Addresses: addrs}) 247 248 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 249 defer cancel() 250 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 251 t.Fatal(err) 252 } 253 254 // Send a resolver update with no addresses. This should push the channel into 255 // TransientFailure. 256 r.UpdateState(resolver.State{}) 257 testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) 258 259 doneCh := make(chan struct{}) 260 client := testgrpc.NewTestServiceClient(cc) 261 go func() { 262 // The channel is currently in TransientFailure and this RPC will block 263 // until the channel becomes Ready, which will only happen when we push a 264 // resolver update with a valid backend address. 265 if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil { 266 t.Errorf("EmptyCall() = %v, want <nil>", err) 267 } 268 close(doneCh) 269 }() 270 271 // Make sure that there is one pending RPC on the ClientConn before attempting 272 // to push new addresses through the name resolver. If we don't do this, the 273 // resolver update can happen before the above goroutine gets to make the RPC. 274 for { 275 if err := ctx.Err(); err != nil { 276 t.Fatal(err) 277 } 278 tcs, _ := channelz.GetTopChannels(0, 0) 279 if len(tcs) != 1 { 280 t.Fatalf("there should only be one top channel, not %d", len(tcs)) 281 } 282 started := tcs[0].ChannelData.CallsStarted 283 completed := tcs[0].ChannelData.CallsSucceeded + tcs[0].ChannelData.CallsFailed 284 if (started - completed) == 1 { 285 break 286 } 287 time.Sleep(defaultTestShortTimeout) 288 } 289 290 // Send a resolver update with a valid backend to push the channel to Ready 291 // and unblock the above RPC. 292 r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: backends[0].Address}}}) 293 294 select { 295 case <-ctx.Done(): 296 t.Fatal("Timeout when waiting for blocked RPC to complete") 297 case <-doneCh: 298 } 299 } 300 301 // TestPickFirst_StickyTransientFailure tests the case where pick_first is 302 // configured on a channel, and the backend is configured to close incoming 303 // connections as soon as they are accepted. The test verifies that the channel 304 // enters TransientFailure and stays there. The test also verifies that the 305 // pick_first LB policy is constantly trying to reconnect to the backend. 306 func (s) TestPickFirst_StickyTransientFailure(t *testing.T) { 307 // Spin up a local server which closes the connection as soon as it receives 308 // one. It also sends a signal on a channel whenver it received a connection. 309 lis, err := testutils.LocalTCPListener() 310 if err != nil { 311 t.Fatalf("Failed to create listener: %v", err) 312 } 313 t.Cleanup(func() { lis.Close() }) 314 315 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 316 defer cancel() 317 connCh := make(chan struct{}, 1) 318 go func() { 319 for { 320 conn, err := lis.Accept() 321 if err != nil { 322 return 323 } 324 select { 325 case connCh <- struct{}{}: 326 conn.Close() 327 case <-ctx.Done(): 328 return 329 } 330 } 331 }() 332 333 // Dial the above server with a ConnectParams that does a constant backoff 334 // of defaultTestShortTimeout duration. 335 dopts := []grpc.DialOption{ 336 grpc.WithTransportCredentials(insecure.NewCredentials()), 337 grpc.WithDefaultServiceConfig(pickFirstServiceConfig), 338 grpc.WithConnectParams(grpc.ConnectParams{ 339 Backoff: backoff.Config{ 340 BaseDelay: defaultTestShortTimeout, 341 Multiplier: float64(0), 342 Jitter: float64(0), 343 MaxDelay: defaultTestShortTimeout, 344 }, 345 }), 346 } 347 cc, err := grpc.Dial(lis.Addr().String(), dopts...) 348 if err != nil { 349 t.Fatalf("Failed to dial server at %q: %v", lis.Addr(), err) 350 } 351 t.Cleanup(func() { cc.Close() }) 352 353 testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) 354 355 // Spawn a goroutine to ensure that the channel stays in TransientFailure. 356 // The call to cc.WaitForStateChange will return false when the main 357 // goroutine exits and the context is cancelled. 358 go func() { 359 if cc.WaitForStateChange(ctx, connectivity.TransientFailure) { 360 if state := cc.GetState(); state != connectivity.Shutdown { 361 t.Errorf("Unexpected state change from TransientFailure to %s", cc.GetState()) 362 } 363 } 364 }() 365 366 // Ensures that the pick_first LB policy is constantly trying to reconnect. 367 for i := 0; i < 10; i++ { 368 select { 369 case <-connCh: 370 case <-time.After(2 * defaultTestShortTimeout): 371 t.Error("Timeout when waiting for pick_first to reconnect") 372 } 373 } 374 } 375 376 // Tests the PF LB policy with shuffling enabled. 377 func (s) TestPickFirst_ShuffleAddressList(t *testing.T) { 378 const serviceConfig = `{"loadBalancingConfig": [{"pick_first":{ "shuffleAddressList": true }}]}` 379 380 // Install a shuffler that always reverses two entries. 381 origShuf := grpcrand.Shuffle 382 defer func() { grpcrand.Shuffle = origShuf }() 383 grpcrand.Shuffle = func(n int, f func(int, int)) { 384 if n != 2 { 385 t.Errorf("Shuffle called with n=%v; want 2", n) 386 return 387 } 388 f(0, 1) // reverse the two addresses 389 } 390 391 // Set up our backends. 392 cc, r, backends := setupPickFirst(t, 2) 393 addrs := stubBackendsToResolverAddrs(backends) 394 395 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 396 defer cancel() 397 398 // Push an update with both addresses and shuffling disabled. We should 399 // connect to backend 0. 400 r.UpdateState(resolver.State{Addresses: []resolver.Address{addrs[0], addrs[1]}}) 401 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 402 t.Fatal(err) 403 } 404 405 // Send a config with shuffling enabled. This will reverse the addresses, 406 // but the channel should still be connected to backend 0. 407 shufState := resolver.State{ 408 ServiceConfig: parseServiceConfig(t, r, serviceConfig), 409 Addresses: []resolver.Address{addrs[0], addrs[1]}, 410 } 411 r.UpdateState(shufState) 412 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 413 t.Fatal(err) 414 } 415 416 // Send a resolver update with no addresses. This should push the channel 417 // into TransientFailure. 418 r.UpdateState(resolver.State{}) 419 testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) 420 421 // Send the same config as last time with shuffling enabled. Since we are 422 // not connected to backend 0, we should connect to backend 1. 423 r.UpdateState(shufState) 424 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { 425 t.Fatal(err) 426 } 427 } 428 429 // Test config parsing with the env var turned on and off for various scenarios. 430 func (s) TestPickFirst_ParseConfig_Success(t *testing.T) { 431 // Install a shuffler that always reverses two entries. 432 origShuf := grpcrand.Shuffle 433 defer func() { grpcrand.Shuffle = origShuf }() 434 grpcrand.Shuffle = func(n int, f func(int, int)) { 435 if n != 2 { 436 t.Errorf("Shuffle called with n=%v; want 2", n) 437 return 438 } 439 f(0, 1) // reverse the two addresses 440 } 441 442 tests := []struct { 443 name string 444 serviceConfig string 445 wantFirstAddr bool 446 }{ 447 { 448 name: "empty pickfirst config", 449 serviceConfig: `{"loadBalancingConfig": [{"pick_first":{}}]}`, 450 wantFirstAddr: true, 451 }, 452 { 453 name: "empty good pickfirst config", 454 serviceConfig: `{"loadBalancingConfig": [{"pick_first":{ "shuffleAddressList": true }}]}`, 455 wantFirstAddr: false, 456 }, 457 } 458 459 for _, test := range tests { 460 t.Run(test.name, func(t *testing.T) { 461 // Set up our backends. 462 cc, r, backends := setupPickFirst(t, 2) 463 addrs := stubBackendsToResolverAddrs(backends) 464 465 r.UpdateState(resolver.State{ 466 ServiceConfig: parseServiceConfig(t, r, test.serviceConfig), 467 Addresses: addrs, 468 }) 469 470 // Some tests expect address shuffling to happen, and indicate that 471 // by setting wantFirstAddr to false (since our shuffling function 472 // defined at the top of this test, simply reverses the list of 473 // addresses provided to it). 474 wantAddr := addrs[0] 475 if !test.wantFirstAddr { 476 wantAddr = addrs[1] 477 } 478 479 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 480 defer cancel() 481 if err := pickfirst.CheckRPCsToBackend(ctx, cc, wantAddr); err != nil { 482 t.Fatal(err) 483 } 484 }) 485 } 486 } 487 488 // Test config parsing for a bad service config. 489 func (s) TestPickFirst_ParseConfig_Failure(t *testing.T) { 490 // Service config should fail with the below config. Name resolvers are 491 // expected to perform this parsing before they push the parsed service 492 // config to the channel. 493 const sc = `{"loadBalancingConfig": [{"pick_first":{ "shuffleAddressList": 666 }}]}` 494 scpr := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(sc) 495 if scpr.Err == nil { 496 t.Fatalf("ParseConfig() succeeded and returned %+v, when expected to fail", scpr) 497 } 498 } 499 500 // setupPickFirstWithListenerWrapper is very similar to setupPickFirst, but uses 501 // a wrapped listener that the test can use to track accepted connections. 502 func setupPickFirstWithListenerWrapper(t *testing.T, backendCount int, opts ...grpc.DialOption) (*grpc.ClientConn, *manual.Resolver, []*stubserver.StubServer, []*testutils.ListenerWrapper) { 503 t.Helper() 504 505 backends := make([]*stubserver.StubServer, backendCount) 506 addrs := make([]resolver.Address, backendCount) 507 listeners := make([]*testutils.ListenerWrapper, backendCount) 508 for i := 0; i < backendCount; i++ { 509 lis := testutils.NewListenerWrapper(t, nil) 510 backend := &stubserver.StubServer{ 511 Listener: lis, 512 EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) { 513 return &testpb.Empty{}, nil 514 }, 515 } 516 if err := backend.StartServer(); err != nil { 517 t.Fatalf("Failed to start backend: %v", err) 518 } 519 t.Logf("Started TestService backend at: %q", backend.Address) 520 t.Cleanup(func() { backend.Stop() }) 521 522 backends[i] = backend 523 addrs[i] = resolver.Address{Addr: backend.Address} 524 listeners[i] = lis 525 } 526 527 r := manual.NewBuilderWithScheme("whatever") 528 dopts := []grpc.DialOption{ 529 grpc.WithTransportCredentials(insecure.NewCredentials()), 530 grpc.WithResolvers(r), 531 grpc.WithDefaultServiceConfig(pickFirstServiceConfig), 532 } 533 dopts = append(dopts, opts...) 534 cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...) 535 if err != nil { 536 t.Fatalf("grpc.Dial() failed: %v", err) 537 } 538 t.Cleanup(func() { cc.Close() }) 539 540 // At this point, the resolver has not returned any addresses to the channel. 541 // This RPC must block until the context expires. 542 sCtx, sCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) 543 defer sCancel() 544 client := testgrpc.NewTestServiceClient(cc) 545 if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); status.Code(err) != codes.DeadlineExceeded { 546 t.Fatalf("EmptyCall() = %s, want %s", status.Code(err), codes.DeadlineExceeded) 547 } 548 return cc, r, backends, listeners 549 } 550 551 // TestPickFirst_AddressUpdateWithAttributes tests the case where an address 552 // update received by the pick_first LB policy differs in attributes. Addresses 553 // which differ in attributes are considered different from the perspective of 554 // subconn creation and connection establishment and the test verifies that new 555 // connections are created when attributes change. 556 func (s) TestPickFirst_AddressUpdateWithAttributes(t *testing.T) { 557 cc, r, backends, listeners := setupPickFirstWithListenerWrapper(t, 2) 558 559 // Add a set of attributes to the addresses before pushing them to the 560 // pick_first LB policy through the manual resolver. 561 addrs := stubBackendsToResolverAddrs(backends) 562 for i := range addrs { 563 addrs[i].Attributes = addrs[i].Attributes.WithValue("test-attribute-1", fmt.Sprintf("%d", i)) 564 } 565 r.UpdateState(resolver.State{Addresses: addrs}) 566 567 // Ensure that RPCs succeed to the first backend in the list. 568 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 569 defer cancel() 570 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 571 t.Fatal(err) 572 } 573 574 // Grab the wrapped connection from the listener wrapper. This will be used 575 // to verify the connection is closed. 576 val, err := listeners[0].NewConnCh.Receive(ctx) 577 if err != nil { 578 t.Fatalf("Failed to receive new connection from wrapped listener: %v", err) 579 } 580 conn := val.(*testutils.ConnWrapper) 581 582 // Add another set of attributes to the addresses, and push them to the 583 // pick_first LB policy through the manual resolver. Leave the order of the 584 // addresses unchanged. 585 for i := range addrs { 586 addrs[i].Attributes = addrs[i].Attributes.WithValue("test-attribute-2", fmt.Sprintf("%d", i)) 587 } 588 r.UpdateState(resolver.State{Addresses: addrs}) 589 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 590 t.Fatal(err) 591 } 592 593 // A change in the address attributes results in the new address being 594 // considered different to the current address. This will result in the old 595 // connection being closed and a new connection to the same backend (since 596 // address order is not modified). 597 if _, err := conn.CloseCh.Receive(ctx); err != nil { 598 t.Fatalf("Timeout when expecting existing connection to be closed: %v", err) 599 } 600 val, err = listeners[0].NewConnCh.Receive(ctx) 601 if err != nil { 602 t.Fatalf("Failed to receive new connection from wrapped listener: %v", err) 603 } 604 conn = val.(*testutils.ConnWrapper) 605 606 // Add another set of attributes to the addresses, and push them to the 607 // pick_first LB policy through the manual resolver. Reverse of the order 608 // of addresses. 609 for i := range addrs { 610 addrs[i].Attributes = addrs[i].Attributes.WithValue("test-attribute-3", fmt.Sprintf("%d", i)) 611 } 612 addrs[0], addrs[1] = addrs[1], addrs[0] 613 r.UpdateState(resolver.State{Addresses: addrs}) 614 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 615 t.Fatal(err) 616 } 617 618 // Ensure that the old connection is closed and a new connection is 619 // established to the first address in the new list. 620 if _, err := conn.CloseCh.Receive(ctx); err != nil { 621 t.Fatalf("Timeout when expecting existing connection to be closed: %v", err) 622 } 623 _, err = listeners[1].NewConnCh.Receive(ctx) 624 if err != nil { 625 t.Fatalf("Failed to receive new connection from wrapped listener: %v", err) 626 } 627 } 628 629 // TestPickFirst_AddressUpdateWithBalancerAttributes tests the case where an 630 // address update received by the pick_first LB policy differs in balancer 631 // attributes, which are meant only for consumption by LB policies. In this 632 // case, the test verifies that new connections are not created when the address 633 // update only changes the balancer attributes. 634 func (s) TestPickFirst_AddressUpdateWithBalancerAttributes(t *testing.T) { 635 cc, r, backends, listeners := setupPickFirstWithListenerWrapper(t, 2) 636 637 // Add a set of balancer attributes to the addresses before pushing them to 638 // the pick_first LB policy through the manual resolver. 639 addrs := stubBackendsToResolverAddrs(backends) 640 for i := range addrs { 641 addrs[i].BalancerAttributes = addrs[i].BalancerAttributes.WithValue("test-attribute-1", fmt.Sprintf("%d", i)) 642 } 643 r.UpdateState(resolver.State{Addresses: addrs}) 644 645 // Ensure that RPCs succeed to the expected backend. 646 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 647 defer cancel() 648 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 649 t.Fatal(err) 650 } 651 652 // Grab the wrapped connection from the listener wrapper. This will be used 653 // to verify the connection is not closed. 654 val, err := listeners[0].NewConnCh.Receive(ctx) 655 if err != nil { 656 t.Fatalf("Failed to receive new connection from wrapped listener: %v", err) 657 } 658 conn := val.(*testutils.ConnWrapper) 659 660 // Add a set of balancer attributes to the addresses before pushing them to 661 // the pick_first LB policy through the manual resolver. Leave the order of 662 // the addresses unchanged. 663 for i := range addrs { 664 addrs[i].BalancerAttributes = addrs[i].BalancerAttributes.WithValue("test-attribute-2", fmt.Sprintf("%d", i)) 665 } 666 r.UpdateState(resolver.State{Addresses: addrs}) 667 668 // Ensure that no new connection is established, and ensure that the old 669 // connection is not closed. 670 for i := range listeners { 671 sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) 672 defer sCancel() 673 if _, err := listeners[i].NewConnCh.Receive(sCtx); err != context.DeadlineExceeded { 674 t.Fatalf("Unexpected error when expecting no new connection: %v", err) 675 } 676 } 677 sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) 678 defer sCancel() 679 if _, err := conn.CloseCh.Receive(sCtx); err != context.DeadlineExceeded { 680 t.Fatalf("Unexpected error when expecting existing connection to stay active: %v", err) 681 } 682 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 683 t.Fatal(err) 684 } 685 686 // Add a set of balancer attributes to the addresses before pushing them to 687 // the pick_first LB policy through the manual resolver. Reverse of the 688 // order of addresses. 689 for i := range addrs { 690 addrs[i].BalancerAttributes = addrs[i].BalancerAttributes.WithValue("test-attribute-3", fmt.Sprintf("%d", i)) 691 } 692 addrs[0], addrs[1] = addrs[1], addrs[0] 693 r.UpdateState(resolver.State{Addresses: addrs}) 694 695 // Ensure that no new connection is established, and ensure that the old 696 // connection is not closed. 697 for i := range listeners { 698 sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) 699 defer sCancel() 700 if _, err := listeners[i].NewConnCh.Receive(sCtx); err != context.DeadlineExceeded { 701 t.Fatalf("Unexpected error when expecting no new connection: %v", err) 702 } 703 } 704 sCtx, sCancel = context.WithTimeout(ctx, defaultTestShortTimeout) 705 defer sCancel() 706 if _, err := conn.CloseCh.Receive(sCtx); err != context.DeadlineExceeded { 707 t.Fatalf("Unexpected error when expecting existing connection to stay active: %v", err) 708 } 709 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[1]); err != nil { 710 t.Fatal(err) 711 } 712 } 713 714 // Tests the case where the pick_first LB policy receives an error from the name 715 // resolver without previously receiving a good update. Verifies that the 716 // channel moves to TRANSIENT_FAILURE and that error received from the name 717 // resolver is propagated to the caller of an RPC. 718 func (s) TestPickFirst_ResolverError_NoPreviousUpdate(t *testing.T) { 719 cc, r, _ := setupPickFirst(t, 0) 720 721 nrErr := errors.New("error from name resolver") 722 r.ReportError(nrErr) 723 724 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 725 defer cancel() 726 testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) 727 728 client := testgrpc.NewTestServiceClient(cc) 729 _, err := client.EmptyCall(ctx, &testpb.Empty{}) 730 if err == nil { 731 t.Fatalf("EmptyCall() succeeded when expected to fail with error: %v", nrErr) 732 } 733 if !strings.Contains(err.Error(), nrErr.Error()) { 734 t.Fatalf("EmptyCall() failed with error: %v, want error: %v", err, nrErr) 735 } 736 } 737 738 // Tests the case where the pick_first LB policy receives an error from the name 739 // resolver after receiving a good update (and the channel is currently READY). 740 // The test verifies that the channel continues to use the previously received 741 // good update. 742 func (s) TestPickFirst_ResolverError_WithPreviousUpdate_Ready(t *testing.T) { 743 cc, r, backends := setupPickFirst(t, 1) 744 745 addrs := stubBackendsToResolverAddrs(backends) 746 r.UpdateState(resolver.State{Addresses: addrs}) 747 748 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 749 defer cancel() 750 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 751 t.Fatal(err) 752 } 753 754 nrErr := errors.New("error from name resolver") 755 r.ReportError(nrErr) 756 757 // Ensure that RPCs continue to succeed for the next second. 758 client := testgrpc.NewTestServiceClient(cc) 759 for end := time.Now().Add(time.Second); time.Now().Before(end); <-time.After(defaultTestShortTimeout) { 760 if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { 761 t.Fatalf("EmptyCall() failed: %v", err) 762 } 763 } 764 } 765 766 // Tests the case where the pick_first LB policy receives an error from the name 767 // resolver after receiving a good update (and the channel is currently in 768 // CONNECTING state). The test verifies that the channel continues to use the 769 // previously received good update, and that RPCs don't fail with the error 770 // received from the name resolver. 771 func (s) TestPickFirst_ResolverError_WithPreviousUpdate_Connecting(t *testing.T) { 772 lis, err := testutils.LocalTCPListener() 773 if err != nil { 774 t.Fatalf("net.Listen() failed: %v", err) 775 } 776 777 // Listen on a local port and act like a server that blocks until the 778 // channel reaches CONNECTING and closes the connection without sending a 779 // server preface. 780 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 781 defer cancel() 782 waitForConnecting := make(chan struct{}) 783 go func() { 784 conn, err := lis.Accept() 785 if err != nil { 786 t.Errorf("Unexpected error when accepting a connection: %v", err) 787 } 788 defer conn.Close() 789 790 select { 791 case <-waitForConnecting: 792 case <-ctx.Done(): 793 t.Error("Timeout when waiting for channel to move to CONNECTING state") 794 } 795 }() 796 797 r := manual.NewBuilderWithScheme("whatever") 798 dopts := []grpc.DialOption{ 799 grpc.WithTransportCredentials(insecure.NewCredentials()), 800 grpc.WithResolvers(r), 801 grpc.WithDefaultServiceConfig(pickFirstServiceConfig), 802 } 803 cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...) 804 if err != nil { 805 t.Fatalf("grpc.Dial() failed: %v", err) 806 } 807 t.Cleanup(func() { cc.Close() }) 808 809 addrs := []resolver.Address{{Addr: lis.Addr().String()}} 810 r.UpdateState(resolver.State{Addresses: addrs}) 811 testutils.AwaitState(ctx, t, cc, connectivity.Connecting) 812 813 nrErr := errors.New("error from name resolver") 814 r.ReportError(nrErr) 815 816 // RPCs should fail with deadline exceed error as long as they are in 817 // CONNECTING and not the error returned by the name resolver. 818 client := testgrpc.NewTestServiceClient(cc) 819 sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) 820 defer sCancel() 821 if _, err := client.EmptyCall(sCtx, &testpb.Empty{}); !strings.Contains(err.Error(), context.DeadlineExceeded.Error()) { 822 t.Fatalf("EmptyCall() failed with error: %v, want error: %v", err, context.DeadlineExceeded) 823 } 824 825 // Closing this channel leads to closing of the connection by our listener. 826 // gRPC should see this as a connection error. 827 close(waitForConnecting) 828 testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) 829 checkForConnectionError(ctx, t, cc) 830 } 831 832 // Tests the case where the pick_first LB policy receives an error from the name 833 // resolver after receiving a good update. The previous good update though has 834 // seen the channel move to TRANSIENT_FAILURE. The test verifies that the 835 // channel fails RPCs with the new error from the resolver. 836 func (s) TestPickFirst_ResolverError_WithPreviousUpdate_TransientFailure(t *testing.T) { 837 lis, err := testutils.LocalTCPListener() 838 if err != nil { 839 t.Fatalf("net.Listen() failed: %v", err) 840 } 841 842 // Listen on a local port and act like a server that closes the connection 843 // without sending a server preface. 844 go func() { 845 conn, err := lis.Accept() 846 if err != nil { 847 t.Errorf("Unexpected error when accepting a connection: %v", err) 848 } 849 conn.Close() 850 }() 851 852 r := manual.NewBuilderWithScheme("whatever") 853 dopts := []grpc.DialOption{ 854 grpc.WithTransportCredentials(insecure.NewCredentials()), 855 grpc.WithResolvers(r), 856 grpc.WithDefaultServiceConfig(pickFirstServiceConfig), 857 } 858 cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...) 859 if err != nil { 860 t.Fatalf("grpc.Dial() failed: %v", err) 861 } 862 t.Cleanup(func() { cc.Close() }) 863 864 addrs := []resolver.Address{{Addr: lis.Addr().String()}} 865 r.UpdateState(resolver.State{Addresses: addrs}) 866 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 867 defer cancel() 868 testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) 869 checkForConnectionError(ctx, t, cc) 870 871 // An error from the name resolver should result in RPCs failing with that 872 // error instead of the old error that caused the channel to move to 873 // TRANSIENT_FAILURE in the first place. 874 nrErr := errors.New("error from name resolver") 875 r.ReportError(nrErr) 876 client := testgrpc.NewTestServiceClient(cc) 877 for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { 878 if _, err := client.EmptyCall(ctx, &testpb.Empty{}); strings.Contains(err.Error(), nrErr.Error()) { 879 break 880 } 881 } 882 if ctx.Err() != nil { 883 t.Fatal("Timeout when waiting for RPCs to fail with error returned by the name resolver") 884 } 885 } 886 887 func checkForConnectionError(ctx context.Context, t *testing.T, cc *grpc.ClientConn) { 888 t.Helper() 889 890 // RPCs may fail on the client side in two ways, once the fake server closes 891 // the accepted connection: 892 // - writing the client preface succeeds, but not reading the server preface 893 // - writing the client preface fails 894 // In either case, we should see it fail with UNAVAILABLE. 895 client := testgrpc.NewTestServiceClient(cc) 896 if _, err := client.EmptyCall(ctx, &testpb.Empty{}); status.Code(err) != codes.Unavailable { 897 t.Fatalf("EmptyCall() failed with error: %v, want code %v", err, codes.Unavailable) 898 } 899 } 900 901 // Tests the case where the pick_first LB policy receives an update from the 902 // name resolver with no addresses after receiving a good update. The test 903 // verifies that the channel fails RPCs with an error indicating the fact that 904 // the name resolver returned no addresses. 905 func (s) TestPickFirst_ResolverError_ZeroAddresses_WithPreviousUpdate(t *testing.T) { 906 cc, r, backends := setupPickFirst(t, 1) 907 908 addrs := stubBackendsToResolverAddrs(backends) 909 r.UpdateState(resolver.State{Addresses: addrs}) 910 911 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 912 defer cancel() 913 if err := pickfirst.CheckRPCsToBackend(ctx, cc, addrs[0]); err != nil { 914 t.Fatal(err) 915 } 916 917 r.UpdateState(resolver.State{}) 918 wantErr := "produced zero addresses" 919 client := testgrpc.NewTestServiceClient(cc) 920 for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { 921 if _, err := client.EmptyCall(ctx, &testpb.Empty{}); strings.Contains(err.Error(), wantErr) { 922 break 923 } 924 } 925 if ctx.Err() != nil { 926 t.Fatal("Timeout when waiting for RPCs to fail with error returned by the name resolver") 927 } 928 }