google.golang.org/grpc@v1.72.2/test/clientconn_state_transition_test.go (about) 1 /* 2 * 3 * Copyright 2018 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package test 20 21 import ( 22 "context" 23 "fmt" 24 "net" 25 "sync" 26 "testing" 27 "time" 28 29 "golang.org/x/net/http2" 30 "google.golang.org/grpc" 31 "google.golang.org/grpc/backoff" 32 "google.golang.org/grpc/balancer" 33 "google.golang.org/grpc/connectivity" 34 "google.golang.org/grpc/credentials/insecure" 35 "google.golang.org/grpc/internal" 36 "google.golang.org/grpc/internal/balancer/stub" 37 "google.golang.org/grpc/internal/envconfig" 38 "google.golang.org/grpc/internal/grpcsync" 39 "google.golang.org/grpc/internal/stubserver" 40 "google.golang.org/grpc/internal/testutils" 41 testgrpc "google.golang.org/grpc/interop/grpc_testing" 42 testpb "google.golang.org/grpc/interop/grpc_testing" 43 "google.golang.org/grpc/resolver" 44 "google.golang.org/grpc/resolver/manual" 45 ) 46 47 const stateRecordingBalancerName = "state_recording_balancer" 48 49 var testBalancerBuilder = newStateRecordingBalancerBuilder() 50 51 func init() { 52 balancer.Register(testBalancerBuilder) 53 } 54 55 // These tests use a pipeListener. This listener is similar to net.Listener 56 // except that it is unbuffered, so each read and write will wait for the other 57 // side's corresponding write or read. 58 func (s) TestStateTransitions_SingleAddress(t *testing.T) { 59 for _, test := range []struct { 60 desc string 61 want []connectivity.State 62 server func(net.Listener) net.Conn 63 }{ 64 { 65 desc: "When the server returns server preface, the client enters READY.", 66 want: []connectivity.State{ 67 connectivity.Connecting, 68 connectivity.Ready, 69 }, 70 server: func(lis net.Listener) net.Conn { 71 conn, err := lis.Accept() 72 if err != nil { 73 t.Error(err) 74 return nil 75 } 76 77 go keepReading(conn) 78 79 framer := http2.NewFramer(conn, conn) 80 if err := framer.WriteSettings(http2.Setting{}); err != nil { 81 t.Errorf("Error while writing settings frame. %v", err) 82 return nil 83 } 84 85 return conn 86 }, 87 }, 88 { 89 desc: "When the connection is closed before the preface is sent, the client enters TRANSIENT FAILURE.", 90 want: []connectivity.State{ 91 connectivity.Connecting, 92 connectivity.TransientFailure, 93 }, 94 server: func(lis net.Listener) net.Conn { 95 conn, err := lis.Accept() 96 if err != nil { 97 t.Error(err) 98 return nil 99 } 100 101 conn.Close() 102 return nil 103 }, 104 }, 105 { 106 desc: `When the server sends its connection preface, but the connection dies before the client can write its 107 connection preface, the client enters TRANSIENT FAILURE.`, 108 want: []connectivity.State{ 109 connectivity.Connecting, 110 connectivity.TransientFailure, 111 }, 112 server: func(lis net.Listener) net.Conn { 113 conn, err := lis.Accept() 114 if err != nil { 115 t.Error(err) 116 return nil 117 } 118 119 framer := http2.NewFramer(conn, conn) 120 if err := framer.WriteSettings(http2.Setting{}); err != nil { 121 t.Errorf("Error while writing settings frame. %v", err) 122 return nil 123 } 124 125 conn.Close() 126 return nil 127 }, 128 }, 129 { 130 desc: `When the server reads the client connection preface but does not send its connection preface, the 131 client enters TRANSIENT FAILURE.`, 132 want: []connectivity.State{ 133 connectivity.Connecting, 134 connectivity.TransientFailure, 135 }, 136 server: func(lis net.Listener) net.Conn { 137 conn, err := lis.Accept() 138 if err != nil { 139 t.Error(err) 140 return nil 141 } 142 143 go keepReading(conn) 144 145 return conn 146 }, 147 }, 148 } { 149 t.Log(test.desc) 150 testStateTransitionSingleAddress(t, test.want, test.server) 151 } 152 } 153 154 func testStateTransitionSingleAddress(t *testing.T, want []connectivity.State, server func(net.Listener) net.Conn) { 155 pl := testutils.NewPipeListener() 156 defer pl.Close() 157 158 // Launch the server. 159 var conn net.Conn 160 var connMu sync.Mutex 161 go func() { 162 connMu.Lock() 163 conn = server(pl) 164 connMu.Unlock() 165 }() 166 167 client, err := grpc.NewClient("passthrough:///", 168 grpc.WithTransportCredentials(insecure.NewCredentials()), 169 grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, stateRecordingBalancerName)), 170 grpc.WithDialer(pl.Dialer()), 171 grpc.WithConnectParams(grpc.ConnectParams{ 172 Backoff: backoff.Config{}, 173 MinConnectTimeout: 100 * time.Millisecond, 174 })) 175 if err != nil { 176 t.Fatal(err) 177 } 178 defer client.Close() 179 180 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 181 defer cancel() 182 go testutils.StayConnected(ctx, client) 183 184 // Wait for the test balancer to be built before capturing it's state 185 // notification channel. 186 testutils.AwaitNotState(ctx, t, client, connectivity.Idle) 187 stateNotifications := testBalancerBuilder.nextStateNotifier() 188 for i := 0; i < len(want); i++ { 189 select { 190 case <-time.After(defaultTestTimeout): 191 t.Fatalf("timed out waiting for state %d (%v) in flow %v", i, want[i], want) 192 case seen := <-stateNotifications: 193 if seen != want[i] { 194 t.Fatalf("expected to see %v at position %d in flow %v, got %v", want[i], i, want, seen) 195 } 196 } 197 } 198 199 connMu.Lock() 200 defer connMu.Unlock() 201 if conn != nil { 202 err = conn.Close() 203 if err != nil { 204 t.Fatal(err) 205 } 206 } 207 } 208 209 // When a READY connection is closed, the client enters IDLE then CONNECTING. 210 func (s) TestStateTransitions_ReadyToConnecting(t *testing.T) { 211 lis, err := net.Listen("tcp", "localhost:0") 212 if err != nil { 213 t.Fatalf("Error while listening. Err: %v", err) 214 } 215 defer lis.Close() 216 217 sawReady := make(chan struct{}, 1) 218 defer close(sawReady) 219 220 // Launch the server. 221 go func() { 222 conn, err := lis.Accept() 223 if err != nil { 224 t.Error(err) 225 return 226 } 227 228 go keepReading(conn) 229 230 framer := http2.NewFramer(conn, conn) 231 if err := framer.WriteSettings(http2.Setting{}); err != nil { 232 t.Errorf("Error while writing settings frame. %v", err) 233 return 234 } 235 236 // Prevents race between onPrefaceReceipt and onClose. 237 <-sawReady 238 239 conn.Close() 240 }() 241 242 client, err := grpc.NewClient(lis.Addr().String(), 243 grpc.WithTransportCredentials(insecure.NewCredentials()), 244 grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, stateRecordingBalancerName))) 245 if err != nil { 246 t.Fatal(err) 247 } 248 defer client.Close() 249 250 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 251 defer cancel() 252 go testutils.StayConnected(ctx, client) 253 testutils.AwaitNotState(ctx, t, client, connectivity.Idle) 254 stateNotifications := testBalancerBuilder.nextStateNotifier() 255 256 want := []connectivity.State{ 257 connectivity.Connecting, 258 connectivity.Ready, 259 connectivity.Idle, 260 connectivity.Connecting, 261 } 262 for i := 0; i < len(want); i++ { 263 select { 264 case <-time.After(defaultTestTimeout): 265 t.Fatalf("timed out waiting for state %d (%v) in flow %v", i, want[i], want) 266 case seen := <-stateNotifications: 267 if seen == connectivity.Ready { 268 sawReady <- struct{}{} 269 } 270 if seen != want[i] { 271 t.Fatalf("expected to see %v at position %d in flow %v, got %v", want[i], i, want, seen) 272 } 273 } 274 } 275 } 276 277 // When the first connection is closed, the client stays in CONNECTING until it 278 // tries the second address (which succeeds, and then it enters READY). 279 func (s) TestStateTransitions_TriesAllAddrsBeforeTransientFailure(t *testing.T) { 280 lis1, err := net.Listen("tcp", "localhost:0") 281 if err != nil { 282 t.Fatalf("Error while listening. Err: %v", err) 283 } 284 defer lis1.Close() 285 286 lis2, err := net.Listen("tcp", "localhost:0") 287 if err != nil { 288 t.Fatalf("Error while listening. Err: %v", err) 289 } 290 defer lis2.Close() 291 292 server1Done := make(chan struct{}) 293 server2Done := make(chan struct{}) 294 295 // Launch server 1. 296 go func() { 297 conn, err := lis1.Accept() 298 if err != nil { 299 t.Error(err) 300 return 301 } 302 303 conn.Close() 304 close(server1Done) 305 }() 306 // Launch server 2. 307 go func() { 308 conn, err := lis2.Accept() 309 if err != nil { 310 t.Error(err) 311 return 312 } 313 314 go keepReading(conn) 315 316 framer := http2.NewFramer(conn, conn) 317 if err := framer.WriteSettings(http2.Setting{}); err != nil { 318 t.Errorf("Error while writing settings frame. %v", err) 319 return 320 } 321 322 close(server2Done) 323 }() 324 325 rb := manual.NewBuilderWithScheme("whatever") 326 rb.InitialState(resolver.State{Addresses: []resolver.Address{ 327 {Addr: lis1.Addr().String()}, 328 {Addr: lis2.Addr().String()}, 329 }}) 330 client, err := grpc.NewClient("whatever:///this-gets-overwritten", 331 grpc.WithTransportCredentials(insecure.NewCredentials()), 332 grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, stateRecordingBalancerName)), 333 grpc.WithConnectParams(grpc.ConnectParams{ 334 // Set a really long back-off delay to ensure the first subConn does 335 // not enter IDLE before the second subConn connects. 336 Backoff: backoff.Config{ 337 BaseDelay: 1 * time.Hour, 338 }, 339 }), 340 grpc.WithResolvers(rb)) 341 if err != nil { 342 t.Fatal(err) 343 } 344 defer client.Close() 345 client.Connect() 346 stateNotifications := testBalancerBuilder.nextStateNotifier() 347 want := []connectivity.State{ 348 connectivity.Connecting, 349 connectivity.Ready, 350 } 351 if envconfig.NewPickFirstEnabled { 352 want = []connectivity.State{ 353 // The first subconn fails. 354 connectivity.Connecting, 355 connectivity.TransientFailure, 356 // The second subconn connects. 357 connectivity.Connecting, 358 connectivity.Ready, 359 } 360 } 361 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 362 defer cancel() 363 for i := 0; i < len(want); i++ { 364 select { 365 case <-ctx.Done(): 366 t.Fatalf("timed out waiting for state %d (%v) in flow %v", i, want[i], want) 367 case seen := <-stateNotifications: 368 if seen != want[i] { 369 t.Fatalf("expected to see %v at position %d in flow %v, got %v", want[i], i, want, seen) 370 } 371 } 372 } 373 select { 374 case <-ctx.Done(): 375 t.Fatal("saw the correct state transitions, but timed out waiting for client to finish interactions with server 1") 376 case <-server1Done: 377 } 378 select { 379 case <-ctx.Done(): 380 t.Fatal("saw the correct state transitions, but timed out waiting for client to finish interactions with server 2") 381 case <-server2Done: 382 } 383 } 384 385 // When there are multiple addresses, and we enter READY on one of them, a 386 // later closure should cause the client to enter CONNECTING 387 func (s) TestStateTransitions_MultipleAddrsEntersReady(t *testing.T) { 388 lis1, err := net.Listen("tcp", "localhost:0") 389 if err != nil { 390 t.Fatalf("Error while listening. Err: %v", err) 391 } 392 defer lis1.Close() 393 394 // Never actually gets used; we just want it to be alive so that the resolver has two addresses to target. 395 lis2, err := net.Listen("tcp", "localhost:0") 396 if err != nil { 397 t.Fatalf("Error while listening. Err: %v", err) 398 } 399 defer lis2.Close() 400 401 server1Done := make(chan struct{}) 402 sawReady := make(chan struct{}, 1) 403 defer close(sawReady) 404 405 // Launch server 1. 406 go func() { 407 conn, err := lis1.Accept() 408 if err != nil { 409 t.Error(err) 410 return 411 } 412 413 go keepReading(conn) 414 415 framer := http2.NewFramer(conn, conn) 416 if err := framer.WriteSettings(http2.Setting{}); err != nil { 417 t.Errorf("Error while writing settings frame. %v", err) 418 return 419 } 420 421 <-sawReady 422 423 conn.Close() 424 425 close(server1Done) 426 }() 427 428 rb := manual.NewBuilderWithScheme("whatever") 429 rb.InitialState(resolver.State{Addresses: []resolver.Address{ 430 {Addr: lis1.Addr().String()}, 431 {Addr: lis2.Addr().String()}, 432 }}) 433 client, err := grpc.NewClient("whatever:///this-gets-overwritten", 434 grpc.WithTransportCredentials(insecure.NewCredentials()), 435 grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, stateRecordingBalancerName)), 436 grpc.WithResolvers(rb)) 437 if err != nil { 438 t.Fatal(err) 439 } 440 defer client.Close() 441 client.Connect() 442 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 443 defer cancel() 444 go testutils.StayConnected(ctx, client) 445 446 stateNotifications := testBalancerBuilder.nextStateNotifier() 447 want := []connectivity.State{ 448 connectivity.Connecting, 449 connectivity.Ready, 450 connectivity.Idle, 451 connectivity.Connecting, 452 } 453 for i := 0; i < len(want); i++ { 454 select { 455 case <-ctx.Done(): 456 t.Fatalf("timed out waiting for state %d (%v) in flow %v", i, want[i], want) 457 case seen := <-stateNotifications: 458 if seen == connectivity.Ready { 459 sawReady <- struct{}{} 460 } 461 if seen != want[i] { 462 t.Fatalf("expected to see %v at position %d in flow %v, got %v", want[i], i, want, seen) 463 } 464 } 465 } 466 select { 467 case <-ctx.Done(): 468 t.Fatal("saw the correct state transitions, but timed out waiting for client to finish interactions with server 1") 469 case <-server1Done: 470 } 471 } 472 473 type stateRecordingBalancer struct { 474 balancer.Balancer 475 } 476 477 func (b *stateRecordingBalancer) Close() { 478 b.Balancer.Close() 479 } 480 481 type stateRecordingBalancerBuilder struct { 482 mu sync.Mutex 483 notifier chan connectivity.State // The notifier used in the last Balancer. 484 } 485 486 func newStateRecordingBalancerBuilder() *stateRecordingBalancerBuilder { 487 return &stateRecordingBalancerBuilder{} 488 } 489 490 func (b *stateRecordingBalancerBuilder) Name() string { 491 return stateRecordingBalancerName 492 } 493 494 func (b *stateRecordingBalancerBuilder) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { 495 stateNotifications := make(chan connectivity.State, 10) 496 b.mu.Lock() 497 b.notifier = stateNotifications 498 b.mu.Unlock() 499 return &stateRecordingBalancer{ 500 Balancer: balancer.Get("pick_first").Build(&stateRecordingCCWrapper{cc, stateNotifications}, opts), 501 } 502 } 503 504 func (b *stateRecordingBalancerBuilder) nextStateNotifier() <-chan connectivity.State { 505 b.mu.Lock() 506 defer b.mu.Unlock() 507 ret := b.notifier 508 b.notifier = nil 509 return ret 510 } 511 512 type stateRecordingCCWrapper struct { 513 balancer.ClientConn 514 notifier chan<- connectivity.State 515 } 516 517 func (ccw *stateRecordingCCWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { 518 oldListener := opts.StateListener 519 opts.StateListener = func(s balancer.SubConnState) { 520 ccw.notifier <- s.ConnectivityState 521 oldListener(s) 522 } 523 return ccw.ClientConn.NewSubConn(addrs, opts) 524 } 525 526 // Keep reading until something causes the connection to die (EOF, server 527 // closed, etc). Useful as a tool for mindlessly keeping the connection 528 // healthy, since the client will error if things like client prefaces are not 529 // accepted in a timely fashion. 530 func keepReading(conn net.Conn) { 531 buf := make([]byte, 1024) 532 for _, err := conn.Read(buf); err == nil; _, err = conn.Read(buf) { 533 } 534 } 535 536 type funcConnectivityStateSubscriber struct { 537 onMsg func(connectivity.State) 538 } 539 540 func (f *funcConnectivityStateSubscriber) OnMessage(msg any) { 541 f.onMsg(msg.(connectivity.State)) 542 } 543 544 // TestConnectivityStateSubscriber confirms updates sent by the balancer in 545 // rapid succession are not missed by the subscriber. 546 func (s) TestConnectivityStateSubscriber(t *testing.T) { 547 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 548 defer cancel() 549 550 sendStates := []connectivity.State{ 551 connectivity.Connecting, 552 connectivity.Ready, 553 connectivity.Idle, 554 connectivity.Connecting, 555 connectivity.Idle, 556 connectivity.Connecting, 557 connectivity.Ready, 558 } 559 wantStates := append(sendStates, connectivity.Shutdown) 560 561 const testBalName = "any" 562 bf := stub.BalancerFuncs{ 563 UpdateClientConnState: func(bd *stub.BalancerData, _ balancer.ClientConnState) error { 564 // Send the expected states in rapid succession. 565 for _, s := range sendStates { 566 t.Logf("Sending state update %s", s) 567 bd.ClientConn.UpdateState(balancer.State{ConnectivityState: s}) 568 } 569 return nil 570 }, 571 } 572 stub.Register(testBalName, bf) 573 574 // Create the ClientConn. 575 const testResName = "any" 576 rb := manual.NewBuilderWithScheme(testResName) 577 cc, err := grpc.NewClient(testResName+":///", 578 grpc.WithResolvers(rb), 579 grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, testBalName)), 580 grpc.WithTransportCredentials(insecure.NewCredentials()), 581 ) 582 if err != nil { 583 t.Fatalf("grpc.NewClient() failed: %v", err) 584 } 585 cc.Connect() 586 // Subscribe to state updates. Use a buffer size of 1 to allow the 587 // Shutdown state to go into the channel when Close()ing. 588 connCh := make(chan connectivity.State, 1) 589 s := &funcConnectivityStateSubscriber{ 590 onMsg: func(s connectivity.State) { 591 select { 592 case connCh <- s: 593 case <-ctx.Done(): 594 } 595 if s == connectivity.Shutdown { 596 close(connCh) 597 } 598 }, 599 } 600 601 internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, s) 602 603 // Send an update from the resolver that will trigger the LB policy's UpdateClientConnState. 604 go rb.UpdateState(resolver.State{}) 605 606 // Verify the resulting states. 607 for i, want := range wantStates { 608 if i == len(sendStates) { 609 // Trigger Shutdown to be sent by the channel. Use a goroutine to 610 // ensure the operation does not block. 611 cc.Close() 612 } 613 select { 614 case got := <-connCh: 615 if got != want { 616 t.Errorf("Update %v was %s; want %s", i, got, want) 617 } else { 618 t.Logf("Update %v was %s as expected", i, got) 619 } 620 case <-ctx.Done(): 621 t.Fatalf("Timed out waiting for state update %v: %s", i, want) 622 } 623 } 624 } 625 626 // TestChannelStateWaitingForFirstResolverUpdate verifies the initial 627 // state of the channel when a manual name resolver doesn't provide any updates. 628 func (s) TestChannelStateWaitingForFirstResolverUpdate(t *testing.T) { 629 t.Skip("The channel remains in IDLE until the LB policy updates the state to CONNECTING. This is a bug and the channel should transition to CONNECTING as soon as Connect() is called. See issue #7686.") 630 631 backend := stubserver.StartTestService(t, nil) 632 defer backend.Stop() 633 634 mr := manual.NewBuilderWithScheme("e2e-test") 635 defer mr.Close() 636 637 cc, err := grpc.NewClient(mr.Scheme()+":///", grpc.WithResolvers(mr), grpc.WithTransportCredentials(insecure.NewCredentials())) 638 if err != nil { 639 t.Fatalf("Failed to create new client: %v", err) 640 } 641 defer cc.Close() 642 643 if state := cc.GetState(); state != connectivity.Idle { 644 t.Fatalf("Expected initial state to be IDLE, got %v", state) 645 } 646 647 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 648 defer cancel() 649 650 // The channel should transition to CONNECTING automatically when Connect() 651 // is called. 652 cc.Connect() 653 testutils.AwaitState(ctx, t, cc, connectivity.Connecting) 654 655 // Verify that the channel remains in CONNECTING state for a short time. 656 shortCtx, shortCancel := context.WithTimeout(ctx, defaultTestShortTimeout) 657 defer shortCancel() 658 testutils.AwaitNoStateChange(shortCtx, t, cc, connectivity.Connecting) 659 } 660 661 func (s) TestChannelStateTransitionWithRPC(t *testing.T) { 662 t.Skip("The channel remains in IDLE until the LB policy updates the state to CONNECTING. This is a bug and the channel should transition to CONNECTING as soon as an RPC call is made. See issue #7686.") 663 664 backend := stubserver.StartTestService(t, nil) 665 defer backend.Stop() 666 667 mr := manual.NewBuilderWithScheme("e2e-test") 668 defer mr.Close() 669 670 cc, err := grpc.NewClient(mr.Scheme()+":///", grpc.WithResolvers(mr), grpc.WithTransportCredentials(insecure.NewCredentials())) 671 if err != nil { 672 t.Fatalf("Failed to create new client: %v", err) 673 } 674 defer cc.Close() 675 676 if state := cc.GetState(); state != connectivity.Idle { 677 t.Fatalf("Expected initial state to be IDLE, got %v", state) 678 } 679 680 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 681 defer cancel() 682 683 // Make an RPC call to transition the channel to CONNECTING. 684 go func() { 685 _, err := testgrpc.NewTestServiceClient(cc).EmptyCall(ctx, &testpb.Empty{}) 686 if err == nil { 687 t.Errorf("Expected RPC to fail, but it succeeded") 688 } 689 }() 690 691 // The channel should transition to CONNECTING automatically when an RPC 692 // is made. 693 testutils.AwaitState(ctx, t, cc, connectivity.Connecting) 694 695 // The channel remains in CONNECTING state for a short time. 696 shortCtx, shortCancel := context.WithTimeout(ctx, defaultTestShortTimeout) 697 defer shortCancel() 698 testutils.AwaitNoStateChange(shortCtx, t, cc, connectivity.Connecting) 699 }