google.golang.org/grpc@v1.62.1/test/channelz_test.go (about) 1 /* 2 * 3 * Copyright 2018 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package test 20 21 import ( 22 "context" 23 "crypto/tls" 24 "fmt" 25 "net" 26 "regexp" 27 "strings" 28 "sync" 29 "testing" 30 "time" 31 32 "github.com/google/go-cmp/cmp" 33 "golang.org/x/net/http2" 34 "google.golang.org/grpc" 35 _ "google.golang.org/grpc/balancer/grpclb" 36 grpclbstate "google.golang.org/grpc/balancer/grpclb/state" 37 "google.golang.org/grpc/balancer/roundrobin" 38 "google.golang.org/grpc/codes" 39 "google.golang.org/grpc/connectivity" 40 "google.golang.org/grpc/credentials" 41 "google.golang.org/grpc/internal" 42 "google.golang.org/grpc/internal/channelz" 43 "google.golang.org/grpc/internal/stubserver" 44 "google.golang.org/grpc/internal/testutils" 45 "google.golang.org/grpc/keepalive" 46 "google.golang.org/grpc/resolver" 47 "google.golang.org/grpc/resolver/manual" 48 "google.golang.org/grpc/status" 49 "google.golang.org/grpc/testdata" 50 51 testgrpc "google.golang.org/grpc/interop/grpc_testing" 52 testpb "google.golang.org/grpc/interop/grpc_testing" 53 ) 54 55 func verifyResultWithDelay(f func() (bool, error)) error { 56 var ok bool 57 var err error 58 for i := 0; i < 1000; i++ { 59 if ok, err = f(); ok { 60 return nil 61 } 62 time.Sleep(10 * time.Millisecond) 63 } 64 return err 65 } 66 67 func (s) TestCZServerRegistrationAndDeletion(t *testing.T) { 68 testcases := []struct { 69 total int 70 start int64 71 max int64 72 length int64 73 end bool 74 }{ 75 {total: int(channelz.EntryPerPage), start: 0, max: 0, length: channelz.EntryPerPage, end: true}, 76 {total: int(channelz.EntryPerPage) - 1, start: 0, max: 0, length: channelz.EntryPerPage - 1, end: true}, 77 {total: int(channelz.EntryPerPage) + 1, start: 0, max: 0, length: channelz.EntryPerPage, end: false}, 78 {total: int(channelz.EntryPerPage) + 1, start: int64(2*(channelz.EntryPerPage+1) + 1), max: 0, length: 0, end: true}, 79 {total: int(channelz.EntryPerPage), start: 0, max: 1, length: 1, end: false}, 80 {total: int(channelz.EntryPerPage), start: 0, max: channelz.EntryPerPage - 1, length: channelz.EntryPerPage - 1, end: false}, 81 } 82 83 for i, c := range testcases { 84 // Reset channelz IDs so `start` is valid. 85 channelz.IDGen.Reset() 86 87 e := tcpClearRREnv 88 te := newTest(t, e) 89 te.startServers(&testServer{security: e.security}, c.total) 90 91 ss, end := channelz.GetServers(c.start, c.max) 92 if int64(len(ss)) != c.length || end != c.end { 93 t.Fatalf("%d: GetServers(%d) = %+v (len of which: %d), end: %+v, want len(GetServers(%d)) = %d, end: %+v", i, c.start, ss, len(ss), end, c.start, c.length, c.end) 94 } 95 te.tearDown() 96 ss, end = channelz.GetServers(c.start, c.max) 97 if len(ss) != 0 || !end { 98 t.Fatalf("%d: GetServers(0) = %+v (len of which: %d), end: %+v, want len(GetServers(0)) = 0, end: true", i, ss, len(ss), end) 99 } 100 } 101 } 102 103 func (s) TestCZGetServer(t *testing.T) { 104 e := tcpClearRREnv 105 te := newTest(t, e) 106 te.startServer(&testServer{security: e.security}) 107 defer te.tearDown() 108 109 ss, _ := channelz.GetServers(0, 0) 110 if len(ss) != 1 { 111 t.Fatalf("there should only be one server, not %d", len(ss)) 112 } 113 114 serverID := ss[0].ID 115 srv := channelz.GetServer(serverID) 116 if srv == nil { 117 t.Fatalf("server %d does not exist", serverID) 118 } 119 if srv.ID != serverID { 120 t.Fatalf("server want id %d, but got %d", serverID, srv.ID) 121 } 122 123 te.tearDown() 124 125 if err := verifyResultWithDelay(func() (bool, error) { 126 srv := channelz.GetServer(serverID) 127 if srv != nil { 128 return false, fmt.Errorf("server %d should not exist", serverID) 129 } 130 131 return true, nil 132 }); err != nil { 133 t.Fatal(err) 134 } 135 } 136 137 func (s) TestCZTopChannelRegistrationAndDeletion(t *testing.T) { 138 testcases := []struct { 139 total int 140 start int64 141 max int64 142 length int64 143 end bool 144 }{ 145 {total: int(channelz.EntryPerPage), start: 0, max: 0, length: channelz.EntryPerPage, end: true}, 146 {total: int(channelz.EntryPerPage) - 1, start: 0, max: 0, length: channelz.EntryPerPage - 1, end: true}, 147 {total: int(channelz.EntryPerPage) + 1, start: 0, max: 0, length: channelz.EntryPerPage, end: false}, 148 {total: int(channelz.EntryPerPage) + 1, start: int64(2*(channelz.EntryPerPage+1) + 1), max: 0, length: 0, end: true}, 149 {total: int(channelz.EntryPerPage), start: 0, max: 1, length: 1, end: false}, 150 {total: int(channelz.EntryPerPage), start: 0, max: channelz.EntryPerPage - 1, length: channelz.EntryPerPage - 1, end: false}, 151 } 152 153 for _, c := range testcases { 154 // Reset channelz IDs so `start` is valid. 155 channelz.IDGen.Reset() 156 157 e := tcpClearRREnv 158 te := newTest(t, e) 159 var ccs []*grpc.ClientConn 160 for i := 0; i < c.total; i++ { 161 cc := te.clientConn() 162 te.cc = nil 163 // avoid making next dial blocking 164 te.srvAddr = "" 165 ccs = append(ccs, cc) 166 } 167 if err := verifyResultWithDelay(func() (bool, error) { 168 if tcs, end := channelz.GetTopChannels(c.start, c.max); int64(len(tcs)) != c.length || end != c.end { 169 return false, fmt.Errorf("getTopChannels(%d) = %+v (len of which: %d), end: %+v, want len(GetTopChannels(%d)) = %d, end: %+v", c.start, tcs, len(tcs), end, c.start, c.length, c.end) 170 } 171 return true, nil 172 }); err != nil { 173 t.Fatal(err) 174 } 175 176 for _, cc := range ccs { 177 cc.Close() 178 } 179 180 if err := verifyResultWithDelay(func() (bool, error) { 181 if tcs, end := channelz.GetTopChannels(c.start, c.max); len(tcs) != 0 || !end { 182 return false, fmt.Errorf("getTopChannels(0) = %+v (len of which: %d), end: %+v, want len(GetTopChannels(0)) = 0, end: true", tcs, len(tcs), end) 183 } 184 return true, nil 185 }); err != nil { 186 t.Fatal(err) 187 } 188 te.tearDown() 189 } 190 } 191 192 func (s) TestCZTopChannelRegistrationAndDeletionWhenDialFail(t *testing.T) { 193 // Make dial fails (due to no transport security specified) 194 _, err := grpc.Dial("fake.addr") 195 if err == nil { 196 t.Fatal("expecting dial to fail") 197 } 198 if tcs, end := channelz.GetTopChannels(0, 0); tcs != nil || !end { 199 t.Fatalf("GetTopChannels(0, 0) = %v, %v, want <nil>, true", tcs, end) 200 } 201 } 202 203 func (s) TestCZNestedChannelRegistrationAndDeletion(t *testing.T) { 204 e := tcpClearRREnv 205 // avoid calling API to set balancer type, which will void service config's change of balancer. 206 e.balancer = "" 207 te := newTest(t, e) 208 r := manual.NewBuilderWithScheme("whatever") 209 te.resolverScheme = r.Scheme() 210 te.clientConn(grpc.WithResolvers(r)) 211 resolvedAddrs := []resolver.Address{{Addr: "127.0.0.1:0", ServerName: "grpclb.server"}} 212 grpclbConfig := parseServiceConfig(t, r, `{"loadBalancingPolicy": "grpclb"}`) 213 r.UpdateState(grpclbstate.Set(resolver.State{ServiceConfig: grpclbConfig}, &grpclbstate.State{BalancerAddresses: resolvedAddrs})) 214 defer te.tearDown() 215 216 if err := verifyResultWithDelay(func() (bool, error) { 217 tcs, _ := channelz.GetTopChannels(0, 0) 218 if len(tcs) != 1 { 219 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 220 } 221 if len(tcs[0].NestedChans) != 1 { 222 return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(tcs[0].NestedChans)) 223 } 224 return true, nil 225 }); err != nil { 226 t.Fatal(err) 227 } 228 229 r.UpdateState(resolver.State{ 230 Addresses: []resolver.Address{{Addr: "127.0.0.1:0"}}, 231 ServiceConfig: parseServiceConfig(t, r, `{"loadBalancingPolicy": "round_robin"}`), 232 }) 233 234 // wait for the shutdown of grpclb balancer 235 if err := verifyResultWithDelay(func() (bool, error) { 236 tcs, _ := channelz.GetTopChannels(0, 0) 237 if len(tcs) != 1 { 238 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 239 } 240 if len(tcs[0].NestedChans) != 0 { 241 return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(tcs[0].NestedChans)) 242 } 243 return true, nil 244 }); err != nil { 245 t.Fatal(err) 246 } 247 } 248 249 func (s) TestCZClientSubChannelSocketRegistrationAndDeletion(t *testing.T) { 250 e := tcpClearRREnv 251 num := 3 // number of backends 252 te := newTest(t, e) 253 var svrAddrs []resolver.Address 254 te.startServers(&testServer{security: e.security}, num) 255 r := manual.NewBuilderWithScheme("whatever") 256 for _, a := range te.srvAddrs { 257 svrAddrs = append(svrAddrs, resolver.Address{Addr: a}) 258 } 259 r.InitialState(resolver.State{Addresses: svrAddrs}) 260 te.resolverScheme = r.Scheme() 261 te.clientConn(grpc.WithResolvers(r)) 262 defer te.tearDown() 263 // Here, we just wait for all sockets to be up. In the future, if we implement 264 // IDLE, we may need to make several rpc calls to create the sockets. 265 if err := verifyResultWithDelay(func() (bool, error) { 266 tcs, _ := channelz.GetTopChannels(0, 0) 267 if len(tcs) != 1 { 268 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 269 } 270 if len(tcs[0].SubChans) != num { 271 return false, fmt.Errorf("there should be %d subchannel not %d", num, len(tcs[0].SubChans)) 272 } 273 count := 0 274 for k := range tcs[0].SubChans { 275 sc := channelz.GetSubChannel(k) 276 if sc == nil { 277 return false, fmt.Errorf("got <nil> subchannel") 278 } 279 count += len(sc.Sockets) 280 } 281 if count != num { 282 return false, fmt.Errorf("there should be %d sockets not %d", num, count) 283 } 284 285 return true, nil 286 }); err != nil { 287 t.Fatal(err) 288 } 289 290 r.UpdateState(resolver.State{Addresses: svrAddrs[:len(svrAddrs)-1]}) 291 292 if err := verifyResultWithDelay(func() (bool, error) { 293 tcs, _ := channelz.GetTopChannels(0, 0) 294 if len(tcs) != 1 { 295 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 296 } 297 if len(tcs[0].SubChans) != num-1 { 298 return false, fmt.Errorf("there should be %d subchannel not %d", num-1, len(tcs[0].SubChans)) 299 } 300 count := 0 301 for k := range tcs[0].SubChans { 302 sc := channelz.GetSubChannel(k) 303 if sc == nil { 304 return false, fmt.Errorf("got <nil> subchannel") 305 } 306 count += len(sc.Sockets) 307 } 308 if count != num-1 { 309 return false, fmt.Errorf("there should be %d sockets not %d", num-1, count) 310 } 311 312 return true, nil 313 }); err != nil { 314 t.Fatal(err) 315 } 316 } 317 318 func (s) TestCZServerSocketRegistrationAndDeletion(t *testing.T) { 319 testcases := []struct { 320 total int 321 start int64 322 max int64 323 length int64 324 end bool 325 }{ 326 {total: int(channelz.EntryPerPage), start: 0, max: 0, length: channelz.EntryPerPage, end: true}, 327 {total: int(channelz.EntryPerPage) - 1, start: 0, max: 0, length: channelz.EntryPerPage - 1, end: true}, 328 {total: int(channelz.EntryPerPage) + 1, start: 0, max: 0, length: channelz.EntryPerPage, end: false}, 329 {total: int(channelz.EntryPerPage), start: 1, max: 0, length: channelz.EntryPerPage - 1, end: true}, 330 {total: int(channelz.EntryPerPage) + 1, start: channelz.EntryPerPage + 1, max: 0, length: 0, end: true}, 331 {total: int(channelz.EntryPerPage), start: 0, max: 1, length: 1, end: false}, 332 {total: int(channelz.EntryPerPage), start: 0, max: channelz.EntryPerPage - 1, length: channelz.EntryPerPage - 1, end: false}, 333 } 334 335 for _, c := range testcases { 336 // Reset channelz IDs so `start` is valid. 337 channelz.IDGen.Reset() 338 339 e := tcpClearRREnv 340 te := newTest(t, e) 341 te.startServer(&testServer{security: e.security}) 342 var ccs []*grpc.ClientConn 343 for i := 0; i < c.total; i++ { 344 cc := te.clientConn() 345 te.cc = nil 346 ccs = append(ccs, cc) 347 } 348 349 var svrID int64 350 if err := verifyResultWithDelay(func() (bool, error) { 351 ss, _ := channelz.GetServers(0, 0) 352 if len(ss) != 1 { 353 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 354 } 355 if len(ss[0].ListenSockets) != 1 { 356 return false, fmt.Errorf("there should only be one server listen socket, not %d", len(ss[0].ListenSockets)) 357 } 358 359 startID := c.start 360 if startID != 0 { 361 ns, _ := channelz.GetServerSockets(ss[0].ID, 0, int64(c.total)) 362 if int64(len(ns)) < c.start { 363 return false, fmt.Errorf("there should more than %d sockets, not %d", len(ns), c.start) 364 } 365 startID = ns[c.start-1].ID + 1 366 } 367 368 ns, end := channelz.GetServerSockets(ss[0].ID, startID, c.max) 369 if int64(len(ns)) != c.length || end != c.end { 370 return false, fmt.Errorf("GetServerSockets(%d) = %+v (len of which: %d), end: %+v, want len(GetServerSockets(%d)) = %d, end: %+v", c.start, ns, len(ns), end, c.start, c.length, c.end) 371 } 372 373 svrID = ss[0].ID 374 return true, nil 375 }); err != nil { 376 t.Fatal(err) 377 } 378 379 for _, cc := range ccs { 380 cc.Close() 381 } 382 383 if err := verifyResultWithDelay(func() (bool, error) { 384 ns, _ := channelz.GetServerSockets(svrID, c.start, c.max) 385 if len(ns) != 0 { 386 return false, fmt.Errorf("there should be %d normal sockets not %d", 0, len(ns)) 387 } 388 return true, nil 389 }); err != nil { 390 t.Fatal(err) 391 } 392 te.tearDown() 393 } 394 } 395 396 func (s) TestCZServerListenSocketDeletion(t *testing.T) { 397 s := grpc.NewServer() 398 lis, err := net.Listen("tcp", "localhost:0") 399 if err != nil { 400 t.Fatalf("failed to listen: %v", err) 401 } 402 go s.Serve(lis) 403 if err := verifyResultWithDelay(func() (bool, error) { 404 ss, _ := channelz.GetServers(0, 0) 405 if len(ss) != 1 { 406 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 407 } 408 if len(ss[0].ListenSockets) != 1 { 409 return false, fmt.Errorf("there should only be one server listen socket, not %d", len(ss[0].ListenSockets)) 410 } 411 return true, nil 412 }); err != nil { 413 t.Fatal(err) 414 } 415 416 lis.Close() 417 if err := verifyResultWithDelay(func() (bool, error) { 418 ss, _ := channelz.GetServers(0, 0) 419 if len(ss) != 1 { 420 return false, fmt.Errorf("there should be 1 server, not %d", len(ss)) 421 } 422 if len(ss[0].ListenSockets) != 0 { 423 return false, fmt.Errorf("there should only be %d server listen socket, not %d", 0, len(ss[0].ListenSockets)) 424 } 425 return true, nil 426 }); err != nil { 427 t.Fatal(err) 428 } 429 s.Stop() 430 } 431 432 type dummyChannel struct{} 433 434 func (d *dummyChannel) ChannelzMetric() *channelz.ChannelInternalMetric { 435 return &channelz.ChannelInternalMetric{} 436 } 437 438 type dummySocket struct{} 439 440 func (d *dummySocket) ChannelzMetric() *channelz.SocketInternalMetric { 441 return &channelz.SocketInternalMetric{} 442 } 443 444 func (s) TestCZRecusivelyDeletionOfEntry(t *testing.T) { 445 // +--+TopChan+---+ 446 // | | 447 // v v 448 // +-+SubChan1+--+ SubChan2 449 // | | 450 // v v 451 // Socket1 Socket2 452 453 topChanID := channelz.RegisterChannel(&dummyChannel{}, nil, "") 454 subChanID1, _ := channelz.RegisterSubChannel(&dummyChannel{}, topChanID, "") 455 subChanID2, _ := channelz.RegisterSubChannel(&dummyChannel{}, topChanID, "") 456 sktID1, _ := channelz.RegisterNormalSocket(&dummySocket{}, subChanID1, "") 457 sktID2, _ := channelz.RegisterNormalSocket(&dummySocket{}, subChanID1, "") 458 459 tcs, _ := channelz.GetTopChannels(0, 0) 460 if tcs == nil || len(tcs) != 1 { 461 t.Fatalf("There should be one TopChannel entry") 462 } 463 if len(tcs[0].SubChans) != 2 { 464 t.Fatalf("There should be two SubChannel entries") 465 } 466 sc := channelz.GetSubChannel(subChanID1.Int()) 467 if sc == nil || len(sc.Sockets) != 2 { 468 t.Fatalf("There should be two Socket entries") 469 } 470 471 channelz.RemoveEntry(topChanID) 472 tcs, _ = channelz.GetTopChannels(0, 0) 473 if tcs == nil || len(tcs) != 1 { 474 t.Fatalf("There should be one TopChannel entry") 475 } 476 477 channelz.RemoveEntry(subChanID1) 478 channelz.RemoveEntry(subChanID2) 479 tcs, _ = channelz.GetTopChannels(0, 0) 480 if tcs == nil || len(tcs) != 1 { 481 t.Fatalf("There should be one TopChannel entry") 482 } 483 if len(tcs[0].SubChans) != 1 { 484 t.Fatalf("There should be one SubChannel entry") 485 } 486 487 channelz.RemoveEntry(sktID1) 488 channelz.RemoveEntry(sktID2) 489 tcs, _ = channelz.GetTopChannels(0, 0) 490 if tcs != nil { 491 t.Fatalf("There should be no TopChannel entry") 492 } 493 } 494 495 func (s) TestCZChannelMetrics(t *testing.T) { 496 e := tcpClearRREnv 497 num := 3 // number of backends 498 te := newTest(t, e) 499 te.maxClientSendMsgSize = newInt(8) 500 var svrAddrs []resolver.Address 501 te.startServers(&testServer{security: e.security}, num) 502 r := manual.NewBuilderWithScheme("whatever") 503 for _, a := range te.srvAddrs { 504 svrAddrs = append(svrAddrs, resolver.Address{Addr: a}) 505 } 506 r.InitialState(resolver.State{Addresses: svrAddrs}) 507 te.resolverScheme = r.Scheme() 508 cc := te.clientConn(grpc.WithResolvers(r)) 509 defer te.tearDown() 510 tc := testgrpc.NewTestServiceClient(cc) 511 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 512 defer cancel() 513 if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil { 514 t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err) 515 } 516 517 const smallSize = 1 518 const largeSize = 8 519 520 largePayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, largeSize) 521 if err != nil { 522 t.Fatal(err) 523 } 524 req := &testpb.SimpleRequest{ 525 ResponseType: testpb.PayloadType_COMPRESSABLE, 526 ResponseSize: int32(smallSize), 527 Payload: largePayload, 528 } 529 530 if _, err := tc.UnaryCall(ctx, req); err == nil || status.Code(err) != codes.ResourceExhausted { 531 t.Fatalf("TestService/UnaryCall(_, _) = _, %v, want _, error code: %s", err, codes.ResourceExhausted) 532 } 533 534 stream, err := tc.FullDuplexCall(ctx) 535 if err != nil { 536 t.Fatalf("%v.FullDuplexCall(_) = _, %v, want <nil>", tc, err) 537 } 538 defer stream.CloseSend() 539 // Here, we just wait for all sockets to be up. In the future, if we implement 540 // IDLE, we may need to make several rpc calls to create the sockets. 541 if err := verifyResultWithDelay(func() (bool, error) { 542 tcs, _ := channelz.GetTopChannels(0, 0) 543 if len(tcs) != 1 { 544 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 545 } 546 if len(tcs[0].SubChans) != num { 547 return false, fmt.Errorf("there should be %d subchannel not %d", num, len(tcs[0].SubChans)) 548 } 549 var cst, csu, cf int64 550 for k := range tcs[0].SubChans { 551 sc := channelz.GetSubChannel(k) 552 if sc == nil { 553 return false, fmt.Errorf("got <nil> subchannel") 554 } 555 cst += sc.ChannelData.CallsStarted 556 csu += sc.ChannelData.CallsSucceeded 557 cf += sc.ChannelData.CallsFailed 558 } 559 if cst != 3 { 560 return false, fmt.Errorf("there should be 3 CallsStarted not %d", cst) 561 } 562 if csu != 1 { 563 return false, fmt.Errorf("there should be 1 CallsSucceeded not %d", csu) 564 } 565 if cf != 1 { 566 return false, fmt.Errorf("there should be 1 CallsFailed not %d", cf) 567 } 568 if tcs[0].ChannelData.CallsStarted != 3 { 569 return false, fmt.Errorf("there should be 3 CallsStarted not %d", tcs[0].ChannelData.CallsStarted) 570 } 571 if tcs[0].ChannelData.CallsSucceeded != 1 { 572 return false, fmt.Errorf("there should be 1 CallsSucceeded not %d", tcs[0].ChannelData.CallsSucceeded) 573 } 574 if tcs[0].ChannelData.CallsFailed != 1 { 575 return false, fmt.Errorf("there should be 1 CallsFailed not %d", tcs[0].ChannelData.CallsFailed) 576 } 577 return true, nil 578 }); err != nil { 579 t.Fatal(err) 580 } 581 } 582 583 func (s) TestCZServerMetrics(t *testing.T) { 584 e := tcpClearRREnv 585 te := newTest(t, e) 586 te.maxServerReceiveMsgSize = newInt(8) 587 te.startServer(&testServer{security: e.security}) 588 defer te.tearDown() 589 cc := te.clientConn() 590 tc := testgrpc.NewTestServiceClient(cc) 591 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 592 defer cancel() 593 if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil { 594 t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err) 595 } 596 597 const smallSize = 1 598 const largeSize = 8 599 600 largePayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, largeSize) 601 if err != nil { 602 t.Fatal(err) 603 } 604 req := &testpb.SimpleRequest{ 605 ResponseType: testpb.PayloadType_COMPRESSABLE, 606 ResponseSize: int32(smallSize), 607 Payload: largePayload, 608 } 609 if _, err := tc.UnaryCall(ctx, req); err == nil || status.Code(err) != codes.ResourceExhausted { 610 t.Fatalf("TestService/UnaryCall(_, _) = _, %v, want _, error code: %s", err, codes.ResourceExhausted) 611 } 612 613 stream, err := tc.FullDuplexCall(ctx) 614 if err != nil { 615 t.Fatalf("%v.FullDuplexCall(_) = _, %v, want <nil>", tc, err) 616 } 617 defer stream.CloseSend() 618 619 if err := verifyResultWithDelay(func() (bool, error) { 620 ss, _ := channelz.GetServers(0, 0) 621 if len(ss) != 1 { 622 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 623 } 624 if ss[0].ServerData.CallsStarted != 3 { 625 return false, fmt.Errorf("there should be 3 CallsStarted not %d", ss[0].ServerData.CallsStarted) 626 } 627 if ss[0].ServerData.CallsSucceeded != 1 { 628 return false, fmt.Errorf("there should be 1 CallsSucceeded not %d", ss[0].ServerData.CallsSucceeded) 629 } 630 if ss[0].ServerData.CallsFailed != 1 { 631 return false, fmt.Errorf("there should be 1 CallsFailed not %d", ss[0].ServerData.CallsFailed) 632 } 633 return true, nil 634 }); err != nil { 635 t.Fatal(err) 636 } 637 } 638 639 type testServiceClientWrapper struct { 640 testgrpc.TestServiceClient 641 mu sync.RWMutex 642 streamsCreated int 643 } 644 645 func (t *testServiceClientWrapper) getCurrentStreamID() uint32 { 646 t.mu.RLock() 647 defer t.mu.RUnlock() 648 return uint32(2*t.streamsCreated - 1) 649 } 650 651 func (t *testServiceClientWrapper) EmptyCall(ctx context.Context, in *testpb.Empty, opts ...grpc.CallOption) (*testpb.Empty, error) { 652 t.mu.Lock() 653 defer t.mu.Unlock() 654 t.streamsCreated++ 655 return t.TestServiceClient.EmptyCall(ctx, in, opts...) 656 } 657 658 func (t *testServiceClientWrapper) UnaryCall(ctx context.Context, in *testpb.SimpleRequest, opts ...grpc.CallOption) (*testpb.SimpleResponse, error) { 659 t.mu.Lock() 660 defer t.mu.Unlock() 661 t.streamsCreated++ 662 return t.TestServiceClient.UnaryCall(ctx, in, opts...) 663 } 664 665 func (t *testServiceClientWrapper) StreamingOutputCall(ctx context.Context, in *testpb.StreamingOutputCallRequest, opts ...grpc.CallOption) (testgrpc.TestService_StreamingOutputCallClient, error) { 666 t.mu.Lock() 667 defer t.mu.Unlock() 668 t.streamsCreated++ 669 return t.TestServiceClient.StreamingOutputCall(ctx, in, opts...) 670 } 671 672 func (t *testServiceClientWrapper) StreamingInputCall(ctx context.Context, opts ...grpc.CallOption) (testgrpc.TestService_StreamingInputCallClient, error) { 673 t.mu.Lock() 674 defer t.mu.Unlock() 675 t.streamsCreated++ 676 return t.TestServiceClient.StreamingInputCall(ctx, opts...) 677 } 678 679 func (t *testServiceClientWrapper) FullDuplexCall(ctx context.Context, opts ...grpc.CallOption) (testgrpc.TestService_FullDuplexCallClient, error) { 680 t.mu.Lock() 681 defer t.mu.Unlock() 682 t.streamsCreated++ 683 return t.TestServiceClient.FullDuplexCall(ctx, opts...) 684 } 685 686 func (t *testServiceClientWrapper) HalfDuplexCall(ctx context.Context, opts ...grpc.CallOption) (testgrpc.TestService_HalfDuplexCallClient, error) { 687 t.mu.Lock() 688 defer t.mu.Unlock() 689 t.streamsCreated++ 690 return t.TestServiceClient.HalfDuplexCall(ctx, opts...) 691 } 692 693 func doSuccessfulUnaryCall(tc testgrpc.TestServiceClient, t *testing.T) { 694 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 695 defer cancel() 696 if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil { 697 t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err) 698 } 699 } 700 701 func doStreamingInputCallWithLargePayload(tc testgrpc.TestServiceClient, t *testing.T) { 702 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 703 defer cancel() 704 s, err := tc.StreamingInputCall(ctx) 705 if err != nil { 706 t.Fatalf("TestService/StreamingInputCall(_) = _, %v, want <nil>", err) 707 } 708 payload, err := newPayload(testpb.PayloadType_COMPRESSABLE, 10000) 709 if err != nil { 710 t.Fatal(err) 711 } 712 s.Send(&testpb.StreamingInputCallRequest{Payload: payload}) 713 } 714 715 func doServerSideFailedUnaryCall(tc testgrpc.TestServiceClient, t *testing.T) { 716 const smallSize = 1 717 const largeSize = 2000 718 719 largePayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, largeSize) 720 if err != nil { 721 t.Fatal(err) 722 } 723 req := &testpb.SimpleRequest{ 724 ResponseType: testpb.PayloadType_COMPRESSABLE, 725 ResponseSize: int32(smallSize), 726 Payload: largePayload, 727 } 728 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 729 defer cancel() 730 if _, err := tc.UnaryCall(ctx, req); err == nil || status.Code(err) != codes.ResourceExhausted { 731 t.Fatalf("TestService/UnaryCall(_, _) = _, %v, want _, error code: %s", err, codes.ResourceExhausted) 732 } 733 } 734 735 func doClientSideInitiatedFailedStream(tc testgrpc.TestServiceClient, t *testing.T) { 736 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 737 stream, err := tc.FullDuplexCall(ctx) 738 if err != nil { 739 t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err) 740 } 741 742 const smallSize = 1 743 smallPayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, smallSize) 744 if err != nil { 745 t.Fatal(err) 746 } 747 748 sreq := &testpb.StreamingOutputCallRequest{ 749 ResponseType: testpb.PayloadType_COMPRESSABLE, 750 ResponseParameters: []*testpb.ResponseParameters{ 751 {Size: smallSize}, 752 }, 753 Payload: smallPayload, 754 } 755 756 if err := stream.Send(sreq); err != nil { 757 t.Fatalf("%v.Send(%v) = %v, want <nil>", stream, sreq, err) 758 } 759 if _, err := stream.Recv(); err != nil { 760 t.Fatalf("%v.Recv() = %v, want <nil>", stream, err) 761 } 762 // By canceling the call, the client will send rst_stream to end the call, and 763 // the stream will failed as a result. 764 cancel() 765 } 766 767 // This func is to be used to test client side counting of failed streams. 768 func doServerSideInitiatedFailedStreamWithRSTStream(tc testgrpc.TestServiceClient, t *testing.T, l *listenerWrapper) { 769 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 770 defer cancel() 771 stream, err := tc.FullDuplexCall(ctx) 772 if err != nil { 773 t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err) 774 } 775 776 const smallSize = 1 777 smallPayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, smallSize) 778 if err != nil { 779 t.Fatal(err) 780 } 781 782 sreq := &testpb.StreamingOutputCallRequest{ 783 ResponseType: testpb.PayloadType_COMPRESSABLE, 784 ResponseParameters: []*testpb.ResponseParameters{ 785 {Size: smallSize}, 786 }, 787 Payload: smallPayload, 788 } 789 790 if err := stream.Send(sreq); err != nil { 791 t.Fatalf("%v.Send(%v) = %v, want <nil>", stream, sreq, err) 792 } 793 if _, err := stream.Recv(); err != nil { 794 t.Fatalf("%v.Recv() = %v, want <nil>", stream, err) 795 } 796 797 rcw := l.getLastConn() 798 799 if rcw != nil { 800 rcw.writeRSTStream(tc.(*testServiceClientWrapper).getCurrentStreamID(), http2.ErrCodeCancel) 801 } 802 if _, err := stream.Recv(); err == nil { 803 t.Fatalf("%v.Recv() = %v, want <non-nil>", stream, err) 804 } 805 } 806 807 // this func is to be used to test client side counting of failed streams. 808 func doServerSideInitiatedFailedStreamWithGoAway(ctx context.Context, tc testgrpc.TestServiceClient, t *testing.T, l *listenerWrapper) { 809 // This call is just to keep the transport from shutting down (socket will be deleted 810 // in this case, and we will not be able to get metrics). 811 s, err := tc.FullDuplexCall(ctx) 812 if err != nil { 813 t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err) 814 } 815 if err := s.Send(&testpb.StreamingOutputCallRequest{ResponseParameters: []*testpb.ResponseParameters{ 816 { 817 Size: 1, 818 }, 819 }}); err != nil { 820 t.Fatalf("s.Send() failed with error: %v", err) 821 } 822 if _, err := s.Recv(); err != nil { 823 t.Fatalf("s.Recv() failed with error: %v", err) 824 } 825 826 s, err = tc.FullDuplexCall(ctx) 827 if err != nil { 828 t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err) 829 } 830 if err := s.Send(&testpb.StreamingOutputCallRequest{ResponseParameters: []*testpb.ResponseParameters{ 831 { 832 Size: 1, 833 }, 834 }}); err != nil { 835 t.Fatalf("s.Send() failed with error: %v", err) 836 } 837 if _, err := s.Recv(); err != nil { 838 t.Fatalf("s.Recv() failed with error: %v", err) 839 } 840 841 rcw := l.getLastConn() 842 if rcw != nil { 843 rcw.writeGoAway(tc.(*testServiceClientWrapper).getCurrentStreamID()-2, http2.ErrCodeCancel, []byte{}) 844 } 845 if _, err := s.Recv(); err == nil { 846 t.Fatalf("%v.Recv() = %v, want <non-nil>", s, err) 847 } 848 } 849 850 func (s) TestCZClientSocketMetricsStreamsAndMessagesCount(t *testing.T) { 851 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 852 defer cancel() 853 854 e := tcpClearRREnv 855 te := newTest(t, e) 856 te.maxServerReceiveMsgSize = newInt(20) 857 te.maxClientReceiveMsgSize = newInt(20) 858 rcw := te.startServerWithConnControl(&testServer{security: e.security}) 859 defer te.tearDown() 860 cc := te.clientConn() 861 tc := &testServiceClientWrapper{TestServiceClient: testgrpc.NewTestServiceClient(cc)} 862 863 doSuccessfulUnaryCall(tc, t) 864 var scID, skID int64 865 if err := verifyResultWithDelay(func() (bool, error) { 866 tchan, _ := channelz.GetTopChannels(0, 0) 867 if len(tchan) != 1 { 868 return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) 869 } 870 if len(tchan[0].SubChans) != 1 { 871 return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) 872 } 873 874 for scID = range tchan[0].SubChans { 875 break 876 } 877 sc := channelz.GetSubChannel(scID) 878 if sc == nil { 879 return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", scID) 880 } 881 if len(sc.Sockets) != 1 { 882 return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) 883 } 884 for skID = range sc.Sockets { 885 break 886 } 887 skt := channelz.GetSocket(skID) 888 sktData := skt.SocketData 889 if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 1 || sktData.MessagesSent != 1 || sktData.MessagesReceived != 1 { 890 return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, MessagesSent, MessagesReceived) = (1, 1, 1, 1), got (%d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.MessagesSent, sktData.MessagesReceived) 891 } 892 return true, nil 893 }); err != nil { 894 t.Fatal(err) 895 } 896 897 doServerSideFailedUnaryCall(tc, t) 898 if err := verifyResultWithDelay(func() (bool, error) { 899 skt := channelz.GetSocket(skID) 900 sktData := skt.SocketData 901 if sktData.StreamsStarted != 2 || sktData.StreamsSucceeded != 2 || sktData.MessagesSent != 2 || sktData.MessagesReceived != 1 { 902 return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, MessagesSent, MessagesReceived) = (2, 2, 2, 1), got (%d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.MessagesSent, sktData.MessagesReceived) 903 } 904 return true, nil 905 }); err != nil { 906 t.Fatal(err) 907 } 908 909 doClientSideInitiatedFailedStream(tc, t) 910 if err := verifyResultWithDelay(func() (bool, error) { 911 skt := channelz.GetSocket(skID) 912 sktData := skt.SocketData 913 if sktData.StreamsStarted != 3 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 1 || sktData.MessagesSent != 3 || sktData.MessagesReceived != 2 { 914 return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (3, 2, 1, 3, 2), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) 915 } 916 return true, nil 917 }); err != nil { 918 t.Fatal(err) 919 } 920 921 doServerSideInitiatedFailedStreamWithRSTStream(tc, t, rcw) 922 if err := verifyResultWithDelay(func() (bool, error) { 923 skt := channelz.GetSocket(skID) 924 sktData := skt.SocketData 925 if sktData.StreamsStarted != 4 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 2 || sktData.MessagesSent != 4 || sktData.MessagesReceived != 3 { 926 return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (4, 2, 2, 4, 3), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) 927 } 928 return true, nil 929 }); err != nil { 930 t.Fatal(err) 931 } 932 933 doServerSideInitiatedFailedStreamWithGoAway(ctx, tc, t, rcw) 934 if err := verifyResultWithDelay(func() (bool, error) { 935 skt := channelz.GetSocket(skID) 936 sktData := skt.SocketData 937 if sktData.StreamsStarted != 6 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 3 || sktData.MessagesSent != 6 || sktData.MessagesReceived != 5 { 938 return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (6, 2, 3, 6, 5), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) 939 } 940 return true, nil 941 }); err != nil { 942 t.Fatal(err) 943 } 944 } 945 946 // This test is to complete TestCZClientSocketMetricsStreamsAndMessagesCount and 947 // TestCZServerSocketMetricsStreamsAndMessagesCount by adding the test case of 948 // server sending RST_STREAM to client due to client side flow control violation. 949 // It is separated from other cases due to setup incompatibly, i.e. max receive 950 // size violation will mask flow control violation. 951 func (s) TestCZClientAndServerSocketMetricsStreamsCountFlowControlRSTStream(t *testing.T) { 952 e := tcpClearRREnv 953 te := newTest(t, e) 954 te.serverInitialWindowSize = 65536 955 // Avoid overflowing connection level flow control window, which will lead to 956 // transport being closed. 957 te.serverInitialConnWindowSize = 65536 * 2 958 ts := &stubserver.StubServer{FullDuplexCallF: func(stream testgrpc.TestService_FullDuplexCallServer) error { 959 stream.Send(&testpb.StreamingOutputCallResponse{}) 960 <-stream.Context().Done() 961 return status.Errorf(codes.DeadlineExceeded, "deadline exceeded or cancelled") 962 }} 963 te.startServer(ts) 964 defer te.tearDown() 965 cc, dw := te.clientConnWithConnControl() 966 tc := &testServiceClientWrapper{TestServiceClient: testgrpc.NewTestServiceClient(cc)} 967 968 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 969 stream, err := tc.FullDuplexCall(ctx) 970 if err != nil { 971 t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err) 972 } 973 if _, err := stream.Recv(); err != nil { 974 t.Fatalf("stream.Recv() = %v, want nil", err) 975 } 976 go func() { 977 payload := make([]byte, 16384) 978 for i := 0; i < 6; i++ { 979 dw.getRawConnWrapper().writeRawFrame(http2.FrameData, 0, tc.getCurrentStreamID(), payload) 980 } 981 }() 982 if _, err := stream.Recv(); status.Code(err) != codes.ResourceExhausted { 983 t.Fatalf("stream.Recv() = %v, want error code: %v", err, codes.ResourceExhausted) 984 } 985 cancel() 986 987 if err := verifyResultWithDelay(func() (bool, error) { 988 tchan, _ := channelz.GetTopChannels(0, 0) 989 if len(tchan) != 1 { 990 return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) 991 } 992 if len(tchan[0].SubChans) != 1 { 993 return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) 994 } 995 var id int64 996 for id = range tchan[0].SubChans { 997 break 998 } 999 sc := channelz.GetSubChannel(id) 1000 if sc == nil { 1001 return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id) 1002 } 1003 if len(sc.Sockets) != 1 { 1004 return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) 1005 } 1006 for id = range sc.Sockets { 1007 break 1008 } 1009 skt := channelz.GetSocket(id) 1010 sktData := skt.SocketData 1011 if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 0 || sktData.StreamsFailed != 1 { 1012 return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed) = (1, 0, 1), got (%d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed) 1013 } 1014 ss, _ := channelz.GetServers(0, 0) 1015 if len(ss) != 1 { 1016 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 1017 } 1018 1019 ns, _ := channelz.GetServerSockets(ss[0].ID, 0, 0) 1020 if len(ns) != 1 { 1021 return false, fmt.Errorf("there should be one server normal socket, not %d", len(ns)) 1022 } 1023 sktData = ns[0].SocketData 1024 if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 0 || sktData.StreamsFailed != 1 { 1025 return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, StreamsFailed) = (1, 0, 1), got (%d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed) 1026 } 1027 return true, nil 1028 }); err != nil { 1029 t.Fatal(err) 1030 } 1031 } 1032 1033 func (s) TestCZClientAndServerSocketMetricsFlowControl(t *testing.T) { 1034 e := tcpClearRREnv 1035 te := newTest(t, e) 1036 // disable BDP 1037 te.serverInitialWindowSize = 65536 1038 te.serverInitialConnWindowSize = 65536 1039 te.clientInitialWindowSize = 65536 1040 te.clientInitialConnWindowSize = 65536 1041 te.startServer(&testServer{security: e.security}) 1042 defer te.tearDown() 1043 cc := te.clientConn() 1044 tc := testgrpc.NewTestServiceClient(cc) 1045 1046 for i := 0; i < 10; i++ { 1047 doSuccessfulUnaryCall(tc, t) 1048 } 1049 1050 var cliSktID, svrSktID int64 1051 if err := verifyResultWithDelay(func() (bool, error) { 1052 tchan, _ := channelz.GetTopChannels(0, 0) 1053 if len(tchan) != 1 { 1054 return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) 1055 } 1056 if len(tchan[0].SubChans) != 1 { 1057 return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) 1058 } 1059 var id int64 1060 for id = range tchan[0].SubChans { 1061 break 1062 } 1063 sc := channelz.GetSubChannel(id) 1064 if sc == nil { 1065 return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id) 1066 } 1067 if len(sc.Sockets) != 1 { 1068 return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) 1069 } 1070 for id = range sc.Sockets { 1071 break 1072 } 1073 skt := channelz.GetSocket(id) 1074 sktData := skt.SocketData 1075 // 65536 - 5 (Length-Prefixed-Message size) * 10 = 65486 1076 if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 65486 { 1077 return false, fmt.Errorf("client: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65536, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) 1078 } 1079 ss, _ := channelz.GetServers(0, 0) 1080 if len(ss) != 1 { 1081 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 1082 } 1083 ns, _ := channelz.GetServerSockets(ss[0].ID, 0, 0) 1084 sktData = ns[0].SocketData 1085 if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 65486 { 1086 return false, fmt.Errorf("server: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65536, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) 1087 } 1088 cliSktID, svrSktID = id, ss[0].ID 1089 return true, nil 1090 }); err != nil { 1091 t.Fatal(err) 1092 } 1093 1094 doStreamingInputCallWithLargePayload(tc, t) 1095 1096 if err := verifyResultWithDelay(func() (bool, error) { 1097 skt := channelz.GetSocket(cliSktID) 1098 sktData := skt.SocketData 1099 // Local: 65536 - 5 (Length-Prefixed-Message size) * 10 = 65486 1100 // Remote: 65536 - 5 (Length-Prefixed-Message size) * 10 - 10011 = 55475 1101 if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 55475 { 1102 return false, fmt.Errorf("client: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65486, 55475), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) 1103 } 1104 ss, _ := channelz.GetServers(0, 0) 1105 if len(ss) != 1 { 1106 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 1107 } 1108 ns, _ := channelz.GetServerSockets(svrSktID, 0, 0) 1109 sktData = ns[0].SocketData 1110 if sktData.LocalFlowControlWindow != 55475 || sktData.RemoteFlowControlWindow != 65486 { 1111 return false, fmt.Errorf("server: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (55475, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) 1112 } 1113 return true, nil 1114 }); err != nil { 1115 t.Fatal(err) 1116 } 1117 1118 // triggers transport flow control window update on server side, since unacked 1119 // bytes should be larger than limit now. i.e. 50 + 20022 > 65536/4. 1120 doStreamingInputCallWithLargePayload(tc, t) 1121 if err := verifyResultWithDelay(func() (bool, error) { 1122 skt := channelz.GetSocket(cliSktID) 1123 sktData := skt.SocketData 1124 // Local: 65536 - 5 (Length-Prefixed-Message size) * 10 = 65486 1125 // Remote: 65536 1126 if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 65536 { 1127 return false, fmt.Errorf("client: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65486, 65536), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) 1128 } 1129 ss, _ := channelz.GetServers(0, 0) 1130 if len(ss) != 1 { 1131 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 1132 } 1133 ns, _ := channelz.GetServerSockets(svrSktID, 0, 0) 1134 sktData = ns[0].SocketData 1135 if sktData.LocalFlowControlWindow != 65536 || sktData.RemoteFlowControlWindow != 65486 { 1136 return false, fmt.Errorf("server: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65536, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) 1137 } 1138 return true, nil 1139 }); err != nil { 1140 t.Fatal(err) 1141 } 1142 } 1143 1144 func (s) TestCZClientSocketMetricsKeepAlive(t *testing.T) { 1145 const keepaliveRate = 50 * time.Millisecond 1146 defer func(t time.Duration) { internal.KeepaliveMinPingTime = t }(internal.KeepaliveMinPingTime) 1147 internal.KeepaliveMinPingTime = keepaliveRate 1148 e := tcpClearRREnv 1149 te := newTest(t, e) 1150 te.customDialOptions = append(te.customDialOptions, grpc.WithKeepaliveParams( 1151 keepalive.ClientParameters{ 1152 Time: keepaliveRate, 1153 Timeout: 500 * time.Millisecond, 1154 PermitWithoutStream: true, 1155 })) 1156 te.customServerOptions = append(te.customServerOptions, grpc.KeepaliveEnforcementPolicy( 1157 keepalive.EnforcementPolicy{ 1158 MinTime: keepaliveRate, 1159 PermitWithoutStream: true, 1160 })) 1161 te.startServer(&testServer{security: e.security}) 1162 cc := te.clientConn() // Dial the server 1163 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1164 defer cancel() 1165 testutils.AwaitState(ctx, t, cc, connectivity.Ready) 1166 start := time.Now() 1167 // Wait for at least two keepalives to be able to occur. 1168 time.Sleep(2 * keepaliveRate) 1169 defer te.tearDown() 1170 if err := verifyResultWithDelay(func() (bool, error) { 1171 tchan, _ := channelz.GetTopChannels(0, 0) 1172 if len(tchan) != 1 { 1173 return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) 1174 } 1175 if len(tchan[0].SubChans) != 1 { 1176 return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) 1177 } 1178 var id int64 1179 for id = range tchan[0].SubChans { 1180 break 1181 } 1182 sc := channelz.GetSubChannel(id) 1183 if sc == nil { 1184 return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id) 1185 } 1186 if len(sc.Sockets) != 1 { 1187 return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) 1188 } 1189 for id = range sc.Sockets { 1190 break 1191 } 1192 skt := channelz.GetSocket(id) 1193 want := int64(time.Since(start) / keepaliveRate) 1194 if skt.SocketData.KeepAlivesSent != want { 1195 return false, fmt.Errorf("there should be %v KeepAlives sent, not %d", want, skt.SocketData.KeepAlivesSent) 1196 } 1197 return true, nil 1198 }); err != nil { 1199 t.Fatal(err) 1200 } 1201 } 1202 1203 func (s) TestCZServerSocketMetricsStreamsAndMessagesCount(t *testing.T) { 1204 e := tcpClearRREnv 1205 te := newTest(t, e) 1206 te.maxServerReceiveMsgSize = newInt(20) 1207 te.maxClientReceiveMsgSize = newInt(20) 1208 te.startServer(&testServer{security: e.security}) 1209 defer te.tearDown() 1210 cc, _ := te.clientConnWithConnControl() 1211 tc := &testServiceClientWrapper{TestServiceClient: testgrpc.NewTestServiceClient(cc)} 1212 1213 var svrID int64 1214 if err := verifyResultWithDelay(func() (bool, error) { 1215 ss, _ := channelz.GetServers(0, 0) 1216 if len(ss) != 1 { 1217 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 1218 } 1219 svrID = ss[0].ID 1220 return true, nil 1221 }); err != nil { 1222 t.Fatal(err) 1223 } 1224 1225 doSuccessfulUnaryCall(tc, t) 1226 if err := verifyResultWithDelay(func() (bool, error) { 1227 ns, _ := channelz.GetServerSockets(svrID, 0, 0) 1228 sktData := ns[0].SocketData 1229 if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 1 || sktData.StreamsFailed != 0 || sktData.MessagesSent != 1 || sktData.MessagesReceived != 1 { 1230 return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, MessagesSent, MessagesReceived) = (1, 1, 1, 1), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) 1231 } 1232 return true, nil 1233 }); err != nil { 1234 t.Fatal(err) 1235 } 1236 1237 doServerSideFailedUnaryCall(tc, t) 1238 if err := verifyResultWithDelay(func() (bool, error) { 1239 ns, _ := channelz.GetServerSockets(svrID, 0, 0) 1240 sktData := ns[0].SocketData 1241 if sktData.StreamsStarted != 2 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 0 || sktData.MessagesSent != 1 || sktData.MessagesReceived != 1 { 1242 return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (2, 2, 0, 1, 1), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) 1243 } 1244 return true, nil 1245 }); err != nil { 1246 t.Fatal(err) 1247 } 1248 1249 doClientSideInitiatedFailedStream(tc, t) 1250 if err := verifyResultWithDelay(func() (bool, error) { 1251 ns, _ := channelz.GetServerSockets(svrID, 0, 0) 1252 sktData := ns[0].SocketData 1253 if sktData.StreamsStarted != 3 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 1 || sktData.MessagesSent != 2 || sktData.MessagesReceived != 2 { 1254 return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (3, 2, 1, 2, 2), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) 1255 } 1256 return true, nil 1257 }); err != nil { 1258 t.Fatal(err) 1259 } 1260 } 1261 1262 func (s) TestCZServerSocketMetricsKeepAlive(t *testing.T) { 1263 defer func(t time.Duration) { internal.KeepaliveMinServerPingTime = t }(internal.KeepaliveMinServerPingTime) 1264 internal.KeepaliveMinServerPingTime = 50 * time.Millisecond 1265 1266 e := tcpClearRREnv 1267 te := newTest(t, e) 1268 // We setup the server keepalive parameters to send one keepalive every 1269 // 50ms, and verify that the actual number of keepalives is very close to 1270 // Time/50ms. We had a bug wherein the server was sending one keepalive 1271 // every [Time+Timeout] instead of every [Time] period, and since Timeout 1272 // is configured to a high value here, we should be able to verify that the 1273 // fix works with the above mentioned logic. 1274 kpOption := grpc.KeepaliveParams(keepalive.ServerParameters{ 1275 Time: 50 * time.Millisecond, 1276 Timeout: 5 * time.Second, 1277 }) 1278 te.customServerOptions = append(te.customServerOptions, kpOption) 1279 te.startServer(&testServer{security: e.security}) 1280 defer te.tearDown() 1281 cc := te.clientConn() 1282 1283 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1284 defer cancel() 1285 testutils.AwaitState(ctx, t, cc, connectivity.Ready) 1286 1287 // Allow about 5 pings to happen (250ms/50ms). 1288 time.Sleep(255 * time.Millisecond) 1289 1290 ss, _ := channelz.GetServers(0, 0) 1291 if len(ss) != 1 { 1292 t.Fatalf("there should be one server, not %d", len(ss)) 1293 } 1294 ns, _ := channelz.GetServerSockets(ss[0].ID, 0, 0) 1295 if len(ns) != 1 { 1296 t.Fatalf("there should be one server normal socket, not %d", len(ns)) 1297 } 1298 const wantMin, wantMax = 3, 7 1299 if got := ns[0].SocketData.KeepAlivesSent; got < wantMin || got > wantMax { 1300 t.Fatalf("got keepalivesCount: %v, want keepalivesCount: [%v,%v]", got, wantMin, wantMax) 1301 } 1302 } 1303 1304 var cipherSuites = []string{ 1305 "TLS_RSA_WITH_RC4_128_SHA", 1306 "TLS_RSA_WITH_3DES_EDE_CBC_SHA", 1307 "TLS_RSA_WITH_AES_128_CBC_SHA", 1308 "TLS_RSA_WITH_AES_256_CBC_SHA", 1309 "TLS_RSA_WITH_AES_128_GCM_SHA256", 1310 "TLS_RSA_WITH_AES_256_GCM_SHA384", 1311 "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA", 1312 "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA", 1313 "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA", 1314 "TLS_ECDHE_RSA_WITH_RC4_128_SHA", 1315 "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA", 1316 "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA", 1317 "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA", 1318 "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", 1319 "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256", 1320 "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", 1321 "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", 1322 "TLS_FALLBACK_SCSV", 1323 "TLS_RSA_WITH_AES_128_CBC_SHA256", 1324 "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256", 1325 "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256", 1326 "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305", 1327 "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305", 1328 "TLS_AES_128_GCM_SHA256", 1329 "TLS_AES_256_GCM_SHA384", 1330 "TLS_CHACHA20_POLY1305_SHA256", 1331 } 1332 1333 func (s) TestCZSocketGetSecurityValueTLS(t *testing.T) { 1334 e := tcpTLSRREnv 1335 te := newTest(t, e) 1336 te.startServer(&testServer{security: e.security}) 1337 defer te.tearDown() 1338 te.clientConn() 1339 if err := verifyResultWithDelay(func() (bool, error) { 1340 tchan, _ := channelz.GetTopChannels(0, 0) 1341 if len(tchan) != 1 { 1342 return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) 1343 } 1344 if len(tchan[0].SubChans) != 1 { 1345 return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) 1346 } 1347 var id int64 1348 for id = range tchan[0].SubChans { 1349 break 1350 } 1351 sc := channelz.GetSubChannel(id) 1352 if sc == nil { 1353 return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id) 1354 } 1355 if len(sc.Sockets) != 1 { 1356 return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) 1357 } 1358 for id = range sc.Sockets { 1359 break 1360 } 1361 skt := channelz.GetSocket(id) 1362 cert, _ := tls.LoadX509KeyPair(testdata.Path("x509/server1_cert.pem"), testdata.Path("x509/server1_key.pem")) 1363 securityVal, ok := skt.SocketData.Security.(*credentials.TLSChannelzSecurityValue) 1364 if !ok { 1365 return false, fmt.Errorf("the SocketData.Security is of type: %T, want: *credentials.TLSChannelzSecurityValue", skt.SocketData.Security) 1366 } 1367 if !cmp.Equal(securityVal.RemoteCertificate, cert.Certificate[0]) { 1368 return false, fmt.Errorf("SocketData.Security.RemoteCertificate got: %v, want: %v", securityVal.RemoteCertificate, cert.Certificate[0]) 1369 } 1370 for _, v := range cipherSuites { 1371 if v == securityVal.StandardName { 1372 return true, nil 1373 } 1374 } 1375 return false, fmt.Errorf("SocketData.Security.StandardName got: %v, want it to be one of %v", securityVal.StandardName, cipherSuites) 1376 }); err != nil { 1377 t.Fatal(err) 1378 } 1379 } 1380 1381 func (s) TestCZChannelTraceCreationDeletion(t *testing.T) { 1382 e := tcpClearRREnv 1383 // avoid calling API to set balancer type, which will void service config's change of balancer. 1384 e.balancer = "" 1385 te := newTest(t, e) 1386 r := manual.NewBuilderWithScheme("whatever") 1387 te.resolverScheme = r.Scheme() 1388 te.clientConn(grpc.WithResolvers(r)) 1389 resolvedAddrs := []resolver.Address{{Addr: "127.0.0.1:0", ServerName: "grpclb.server"}} 1390 grpclbConfig := parseServiceConfig(t, r, `{"loadBalancingPolicy": "grpclb"}`) 1391 r.UpdateState(grpclbstate.Set(resolver.State{ServiceConfig: grpclbConfig}, &grpclbstate.State{BalancerAddresses: resolvedAddrs})) 1392 defer te.tearDown() 1393 1394 var nestedConn int64 1395 if err := verifyResultWithDelay(func() (bool, error) { 1396 tcs, _ := channelz.GetTopChannels(0, 0) 1397 if len(tcs) != 1 { 1398 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1399 } 1400 if len(tcs[0].NestedChans) != 1 { 1401 return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(tcs[0].NestedChans)) 1402 } 1403 for k := range tcs[0].NestedChans { 1404 nestedConn = k 1405 } 1406 for _, e := range tcs[0].Trace.Events { 1407 if e.RefID == nestedConn && e.RefType != channelz.RefChannel { 1408 return false, fmt.Errorf("nested channel trace event shoud have RefChannel as RefType") 1409 } 1410 } 1411 ncm := channelz.GetChannel(nestedConn) 1412 if ncm.Trace == nil { 1413 return false, fmt.Errorf("trace for nested channel should not be empty") 1414 } 1415 if len(ncm.Trace.Events) == 0 { 1416 return false, fmt.Errorf("there should be at least one trace event for nested channel not 0") 1417 } 1418 pattern := `Channel created` 1419 if ok, _ := regexp.MatchString(pattern, ncm.Trace.Events[0].Desc); !ok { 1420 return false, fmt.Errorf("the first trace event should be %q, not %q", pattern, ncm.Trace.Events[0].Desc) 1421 } 1422 return true, nil 1423 }); err != nil { 1424 t.Fatal(err) 1425 } 1426 1427 r.UpdateState(resolver.State{ 1428 Addresses: []resolver.Address{{Addr: "127.0.0.1:0"}}, 1429 ServiceConfig: parseServiceConfig(t, r, `{"loadBalancingPolicy": "round_robin"}`), 1430 }) 1431 1432 // wait for the shutdown of grpclb balancer 1433 if err := verifyResultWithDelay(func() (bool, error) { 1434 tcs, _ := channelz.GetTopChannels(0, 0) 1435 if len(tcs) != 1 { 1436 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1437 } 1438 if len(tcs[0].NestedChans) != 0 { 1439 return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(tcs[0].NestedChans)) 1440 } 1441 ncm := channelz.GetChannel(nestedConn) 1442 if ncm == nil { 1443 return false, fmt.Errorf("nested channel should still exist due to parent's trace reference") 1444 } 1445 if ncm.Trace == nil { 1446 return false, fmt.Errorf("trace for nested channel should not be empty") 1447 } 1448 if len(ncm.Trace.Events) == 0 { 1449 return false, fmt.Errorf("there should be at least one trace event for nested channel not 0") 1450 } 1451 pattern := `Channel created` 1452 if ok, _ := regexp.MatchString(pattern, ncm.Trace.Events[0].Desc); !ok { 1453 return false, fmt.Errorf("the first trace event should be %q, not %q", pattern, ncm.Trace.Events[0].Desc) 1454 } 1455 return true, nil 1456 }); err != nil { 1457 t.Fatal(err) 1458 } 1459 } 1460 1461 func (s) TestCZSubChannelTraceCreationDeletion(t *testing.T) { 1462 e := tcpClearRREnv 1463 te := newTest(t, e) 1464 te.startServer(&testServer{security: e.security}) 1465 r := manual.NewBuilderWithScheme("whatever") 1466 r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}}) 1467 te.resolverScheme = r.Scheme() 1468 te.clientConn(grpc.WithResolvers(r)) 1469 defer te.tearDown() 1470 var subConn int64 1471 // Here, we just wait for all sockets to be up. In the future, if we implement 1472 // IDLE, we may need to make several rpc calls to create the sockets. 1473 if err := verifyResultWithDelay(func() (bool, error) { 1474 tcs, _ := channelz.GetTopChannels(0, 0) 1475 if len(tcs) != 1 { 1476 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1477 } 1478 if len(tcs[0].SubChans) != 1 { 1479 return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) 1480 } 1481 for k := range tcs[0].SubChans { 1482 subConn = k 1483 } 1484 for _, e := range tcs[0].Trace.Events { 1485 if e.RefID == subConn && e.RefType != channelz.RefSubChannel { 1486 return false, fmt.Errorf("subchannel trace event shoud have RefType to be RefSubChannel") 1487 } 1488 } 1489 scm := channelz.GetSubChannel(subConn) 1490 if scm == nil { 1491 return false, fmt.Errorf("subChannel does not exist") 1492 } 1493 if scm.Trace == nil { 1494 return false, fmt.Errorf("trace for subChannel should not be empty") 1495 } 1496 if len(scm.Trace.Events) == 0 { 1497 return false, fmt.Errorf("there should be at least one trace event for subChannel not 0") 1498 } 1499 pattern := `Subchannel created` 1500 if ok, _ := regexp.MatchString(pattern, scm.Trace.Events[0].Desc); !ok { 1501 return false, fmt.Errorf("the first trace event should be %q, not %q", pattern, scm.Trace.Events[0].Desc) 1502 } 1503 return true, nil 1504 }); err != nil { 1505 t.Fatal(err) 1506 } 1507 1508 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1509 defer cancel() 1510 testutils.AwaitState(ctx, t, te.cc, connectivity.Ready) 1511 r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "fake address"}}}) 1512 testutils.AwaitNotState(ctx, t, te.cc, connectivity.Ready) 1513 1514 if err := verifyResultWithDelay(func() (bool, error) { 1515 tcs, _ := channelz.GetTopChannels(0, 0) 1516 if len(tcs) != 1 { 1517 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1518 } 1519 if len(tcs[0].SubChans) != 1 { 1520 return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) 1521 } 1522 scm := channelz.GetSubChannel(subConn) 1523 if scm == nil { 1524 return false, fmt.Errorf("subChannel should still exist due to parent's trace reference") 1525 } 1526 if scm.Trace == nil { 1527 return false, fmt.Errorf("trace for SubChannel should not be empty") 1528 } 1529 if len(scm.Trace.Events) == 0 { 1530 return false, fmt.Errorf("there should be at least one trace event for subChannel not 0") 1531 } 1532 1533 pattern := `Subchannel deleted` 1534 desc := scm.Trace.Events[len(scm.Trace.Events)-1].Desc 1535 if ok, _ := regexp.MatchString(pattern, desc); !ok { 1536 return false, fmt.Errorf("the last trace event should be %q, not %q", pattern, desc) 1537 } 1538 return true, nil 1539 }); err != nil { 1540 t.Fatal(err) 1541 } 1542 } 1543 1544 func (s) TestCZChannelAddressResolutionChange(t *testing.T) { 1545 e := tcpClearRREnv 1546 e.balancer = "" 1547 te := newTest(t, e) 1548 te.startServer(&testServer{security: e.security}) 1549 r := manual.NewBuilderWithScheme("whatever") 1550 addrs := []resolver.Address{{Addr: te.srvAddr}} 1551 r.InitialState(resolver.State{Addresses: addrs}) 1552 te.resolverScheme = r.Scheme() 1553 te.clientConn(grpc.WithResolvers(r)) 1554 defer te.tearDown() 1555 var cid int64 1556 // Here, we just wait for all sockets to be up. In the future, if we implement 1557 // IDLE, we may need to make several rpc calls to create the sockets. 1558 if err := verifyResultWithDelay(func() (bool, error) { 1559 tcs, _ := channelz.GetTopChannels(0, 0) 1560 if len(tcs) != 1 { 1561 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1562 } 1563 cid = tcs[0].ID 1564 for i := len(tcs[0].Trace.Events) - 1; i >= 0; i-- { 1565 if strings.Contains(tcs[0].Trace.Events[i].Desc, "resolver returned new addresses") { 1566 break 1567 } 1568 if i == 0 { 1569 return false, fmt.Errorf("events do not contain expected address resolution from empty address state. Got: %+v", tcs[0].Trace.Events) 1570 } 1571 } 1572 return true, nil 1573 }); err != nil { 1574 t.Fatal(err) 1575 } 1576 r.UpdateState(resolver.State{ 1577 Addresses: addrs, 1578 ServiceConfig: parseServiceConfig(t, r, `{"loadBalancingPolicy": "round_robin"}`), 1579 }) 1580 1581 if err := verifyResultWithDelay(func() (bool, error) { 1582 cm := channelz.GetChannel(cid) 1583 for i := len(cm.Trace.Events) - 1; i >= 0; i-- { 1584 if strings.Contains(cm.Trace.Events[i].Desc, fmt.Sprintf("Channel switches to new LB policy %q", roundrobin.Name)) { 1585 break 1586 } 1587 if i == 0 { 1588 return false, fmt.Errorf("events do not contain expected address resolution change of LB policy") 1589 } 1590 } 1591 return true, nil 1592 }); err != nil { 1593 t.Fatal(err) 1594 } 1595 1596 newSC := parseServiceConfig(t, r, `{ 1597 "methodConfig": [ 1598 { 1599 "name": [ 1600 { 1601 "service": "grpc.testing.TestService", 1602 "method": "EmptyCall" 1603 } 1604 ], 1605 "waitForReady": false, 1606 "timeout": ".001s" 1607 } 1608 ] 1609 }`) 1610 r.UpdateState(resolver.State{Addresses: addrs, ServiceConfig: newSC}) 1611 1612 if err := verifyResultWithDelay(func() (bool, error) { 1613 cm := channelz.GetChannel(cid) 1614 1615 var es []string 1616 for i := len(cm.Trace.Events) - 1; i >= 0; i-- { 1617 if strings.Contains(cm.Trace.Events[i].Desc, "service config updated") { 1618 break 1619 } 1620 es = append(es, cm.Trace.Events[i].Desc) 1621 if i == 0 { 1622 return false, fmt.Errorf("events do not contain expected address resolution of new service config\n Events:\n%v", strings.Join(es, "\n")) 1623 } 1624 } 1625 return true, nil 1626 }); err != nil { 1627 t.Fatal(err) 1628 } 1629 1630 r.UpdateState(resolver.State{Addresses: []resolver.Address{}, ServiceConfig: newSC}) 1631 1632 if err := verifyResultWithDelay(func() (bool, error) { 1633 cm := channelz.GetChannel(cid) 1634 for i := len(cm.Trace.Events) - 1; i >= 0; i-- { 1635 if strings.Contains(cm.Trace.Events[i].Desc, "resolver returned an empty address list") { 1636 break 1637 } 1638 if i == 0 { 1639 return false, fmt.Errorf("events do not contain expected address resolution of empty address") 1640 } 1641 } 1642 return true, nil 1643 }); err != nil { 1644 t.Fatal(err) 1645 } 1646 } 1647 1648 func (s) TestCZSubChannelPickedNewAddress(t *testing.T) { 1649 e := tcpClearRREnv 1650 e.balancer = "" 1651 te := newTest(t, e) 1652 te.startServers(&testServer{security: e.security}, 3) 1653 r := manual.NewBuilderWithScheme("whatever") 1654 var svrAddrs []resolver.Address 1655 for _, a := range te.srvAddrs { 1656 svrAddrs = append(svrAddrs, resolver.Address{Addr: a}) 1657 } 1658 r.InitialState(resolver.State{Addresses: svrAddrs}) 1659 te.resolverScheme = r.Scheme() 1660 cc := te.clientConn(grpc.WithResolvers(r)) 1661 defer te.tearDown() 1662 tc := testgrpc.NewTestServiceClient(cc) 1663 // make sure the connection is up 1664 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1665 defer cancel() 1666 if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil { 1667 t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err) 1668 } 1669 te.srvs[0].Stop() 1670 te.srvs[1].Stop() 1671 // Here, we just wait for all sockets to be up. Make several rpc calls to 1672 // create the sockets since we do not automatically reconnect. 1673 done := make(chan struct{}) 1674 defer close(done) 1675 go func() { 1676 for { 1677 tc.EmptyCall(ctx, &testpb.Empty{}) 1678 select { 1679 case <-time.After(10 * time.Millisecond): 1680 case <-done: 1681 return 1682 } 1683 } 1684 }() 1685 if err := verifyResultWithDelay(func() (bool, error) { 1686 tcs, _ := channelz.GetTopChannels(0, 0) 1687 if len(tcs) != 1 { 1688 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1689 } 1690 if len(tcs[0].SubChans) != 1 { 1691 return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) 1692 } 1693 var subConn int64 1694 for k := range tcs[0].SubChans { 1695 subConn = k 1696 } 1697 scm := channelz.GetSubChannel(subConn) 1698 if scm.Trace == nil { 1699 return false, fmt.Errorf("trace for SubChannel should not be empty") 1700 } 1701 if len(scm.Trace.Events) == 0 { 1702 return false, fmt.Errorf("there should be at least one trace event for subChannel not 0") 1703 } 1704 for i := len(scm.Trace.Events) - 1; i >= 0; i-- { 1705 if strings.Contains(scm.Trace.Events[i].Desc, fmt.Sprintf("Subchannel picks a new address %q to connect", te.srvAddrs[2])) { 1706 break 1707 } 1708 if i == 0 { 1709 return false, fmt.Errorf("events do not contain expected address resolution of subchannel picked new address") 1710 } 1711 } 1712 return true, nil 1713 }); err != nil { 1714 t.Fatal(err) 1715 } 1716 } 1717 1718 func (s) TestCZSubChannelConnectivityState(t *testing.T) { 1719 e := tcpClearRREnv 1720 te := newTest(t, e) 1721 te.startServer(&testServer{security: e.security}) 1722 r := manual.NewBuilderWithScheme("whatever") 1723 r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}}) 1724 te.resolverScheme = r.Scheme() 1725 cc := te.clientConn(grpc.WithResolvers(r)) 1726 defer te.tearDown() 1727 tc := testgrpc.NewTestServiceClient(cc) 1728 // make sure the connection is up 1729 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1730 defer cancel() 1731 if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil { 1732 t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err) 1733 } 1734 te.srv.Stop() 1735 1736 var subConn int64 1737 if err := verifyResultWithDelay(func() (bool, error) { 1738 // we need to obtain the SubChannel id before it gets deleted from Channel's children list (due 1739 // to effect of r.UpdateState(resolver.State{Addresses:[]resolver.Address{}})) 1740 if subConn == 0 { 1741 tcs, _ := channelz.GetTopChannels(0, 0) 1742 if len(tcs) != 1 { 1743 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1744 } 1745 if len(tcs[0].SubChans) != 1 { 1746 return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) 1747 } 1748 for k := range tcs[0].SubChans { 1749 // get the SubChannel id for further trace inquiry. 1750 subConn = k 1751 t.Logf("SubChannel Id is %d", subConn) 1752 } 1753 } 1754 scm := channelz.GetSubChannel(subConn) 1755 if scm == nil { 1756 return false, fmt.Errorf("subChannel should still exist due to parent's trace reference") 1757 } 1758 if scm.Trace == nil { 1759 return false, fmt.Errorf("trace for SubChannel should not be empty") 1760 } 1761 if len(scm.Trace.Events) == 0 { 1762 return false, fmt.Errorf("there should be at least one trace event for subChannel not 0") 1763 } 1764 var ready, connecting, transient, shutdown int 1765 t.Log("SubChannel trace events seen so far...") 1766 for _, e := range scm.Trace.Events { 1767 t.Log(e.Desc) 1768 if strings.Contains(e.Desc, fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.TransientFailure)) { 1769 transient++ 1770 } 1771 } 1772 // Make sure the SubChannel has already seen transient failure before shutting it down through 1773 // r.UpdateState(resolver.State{Addresses:[]resolver.Address{}}). 1774 if transient == 0 { 1775 return false, fmt.Errorf("transient failure has not happened on SubChannel yet") 1776 } 1777 transient = 0 1778 r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "fake address"}}}) 1779 t.Log("SubChannel trace events seen so far...") 1780 for _, e := range scm.Trace.Events { 1781 t.Log(e.Desc) 1782 if strings.Contains(e.Desc, fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.Ready)) { 1783 ready++ 1784 } 1785 if strings.Contains(e.Desc, fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.Connecting)) { 1786 connecting++ 1787 } 1788 if strings.Contains(e.Desc, fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.TransientFailure)) { 1789 transient++ 1790 } 1791 if strings.Contains(e.Desc, fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.Shutdown)) { 1792 shutdown++ 1793 } 1794 } 1795 // example: 1796 // Subchannel Created 1797 // Subchannel's connectivity state changed to CONNECTING 1798 // Subchannel picked a new address: "localhost:36011" 1799 // Subchannel's connectivity state changed to READY 1800 // Subchannel's connectivity state changed to TRANSIENT_FAILURE 1801 // Subchannel's connectivity state changed to CONNECTING 1802 // Subchannel picked a new address: "localhost:36011" 1803 // Subchannel's connectivity state changed to SHUTDOWN 1804 // Subchannel Deleted 1805 if ready != 1 || connecting < 1 || transient < 1 || shutdown != 1 { 1806 return false, fmt.Errorf("got: ready = %d, connecting = %d, transient = %d, shutdown = %d, want: 1, >=1, >=1, 1", ready, connecting, transient, shutdown) 1807 } 1808 1809 return true, nil 1810 }); err != nil { 1811 t.Fatal(err) 1812 } 1813 } 1814 1815 func (s) TestCZChannelConnectivityState(t *testing.T) { 1816 e := tcpClearRREnv 1817 te := newTest(t, e) 1818 te.startServer(&testServer{security: e.security}) 1819 r := manual.NewBuilderWithScheme("whatever") 1820 r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}}) 1821 te.resolverScheme = r.Scheme() 1822 cc := te.clientConn(grpc.WithResolvers(r)) 1823 defer te.tearDown() 1824 tc := testgrpc.NewTestServiceClient(cc) 1825 // make sure the connection is up 1826 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1827 defer cancel() 1828 if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil { 1829 t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err) 1830 } 1831 te.srv.Stop() 1832 1833 if err := verifyResultWithDelay(func() (bool, error) { 1834 tcs, _ := channelz.GetTopChannels(0, 0) 1835 if len(tcs) != 1 { 1836 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1837 } 1838 1839 var ready, connecting, transient int 1840 t.Log("Channel trace events seen so far...") 1841 for _, e := range tcs[0].Trace.Events { 1842 t.Log(e.Desc) 1843 if strings.Contains(e.Desc, fmt.Sprintf("Channel Connectivity change to %v", connectivity.Ready)) { 1844 ready++ 1845 } 1846 if strings.Contains(e.Desc, fmt.Sprintf("Channel Connectivity change to %v", connectivity.Connecting)) { 1847 connecting++ 1848 } 1849 if strings.Contains(e.Desc, fmt.Sprintf("Channel Connectivity change to %v", connectivity.TransientFailure)) { 1850 transient++ 1851 } 1852 } 1853 1854 // example: 1855 // Channel Created 1856 // Adressses resolved (from empty address state): "localhost:40467" 1857 // SubChannel (id: 4[]) Created 1858 // Channel's connectivity state changed to CONNECTING 1859 // Channel's connectivity state changed to READY 1860 // Channel's connectivity state changed to TRANSIENT_FAILURE 1861 // Channel's connectivity state changed to CONNECTING 1862 // Channel's connectivity state changed to TRANSIENT_FAILURE 1863 if ready != 1 || connecting < 1 || transient < 1 { 1864 return false, fmt.Errorf("got: ready = %d, connecting = %d, transient = %d, want: 1, >=1, >=1", ready, connecting, transient) 1865 } 1866 return true, nil 1867 }); err != nil { 1868 t.Fatal(err) 1869 } 1870 } 1871 1872 func (s) TestCZTraceOverwriteChannelDeletion(t *testing.T) { 1873 e := tcpClearRREnv 1874 e.balancer = "" 1875 te := newTest(t, e) 1876 channelz.SetMaxTraceEntry(1) 1877 defer channelz.ResetMaxTraceEntryToDefault() 1878 r := manual.NewBuilderWithScheme("whatever") 1879 te.resolverScheme = r.Scheme() 1880 te.clientConn(grpc.WithResolvers(r)) 1881 resolvedAddrs := []resolver.Address{{Addr: "127.0.0.1:0", ServerName: "grpclb.server"}} 1882 grpclbConfig := parseServiceConfig(t, r, `{"loadBalancingPolicy": "grpclb"}`) 1883 r.UpdateState(grpclbstate.Set(resolver.State{ServiceConfig: grpclbConfig}, &grpclbstate.State{BalancerAddresses: resolvedAddrs})) 1884 defer te.tearDown() 1885 var nestedConn int64 1886 if err := verifyResultWithDelay(func() (bool, error) { 1887 tcs, _ := channelz.GetTopChannels(0, 0) 1888 if len(tcs) != 1 { 1889 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1890 } 1891 if len(tcs[0].NestedChans) != 1 { 1892 return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(tcs[0].NestedChans)) 1893 } 1894 for k := range tcs[0].NestedChans { 1895 nestedConn = k 1896 } 1897 return true, nil 1898 }); err != nil { 1899 t.Fatal(err) 1900 } 1901 1902 r.UpdateState(resolver.State{ 1903 Addresses: []resolver.Address{{Addr: "127.0.0.1:0"}}, 1904 ServiceConfig: parseServiceConfig(t, r, `{"loadBalancingPolicy": "round_robin"}`), 1905 }) 1906 1907 // wait for the shutdown of grpclb balancer 1908 if err := verifyResultWithDelay(func() (bool, error) { 1909 tcs, _ := channelz.GetTopChannels(0, 0) 1910 if len(tcs) != 1 { 1911 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1912 } 1913 if len(tcs[0].NestedChans) != 0 { 1914 return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(tcs[0].NestedChans)) 1915 } 1916 return true, nil 1917 }); err != nil { 1918 t.Fatal(err) 1919 } 1920 1921 // If nested channel deletion is last trace event before the next validation, it will fail, as the top channel will hold a reference to it. 1922 // This line forces a trace event on the top channel in that case. 1923 r.UpdateState(resolver.State{ 1924 Addresses: []resolver.Address{{Addr: "127.0.0.1:0"}}, 1925 ServiceConfig: parseServiceConfig(t, r, `{"loadBalancingPolicy": "round_robin"}`), 1926 }) 1927 1928 // verify that the nested channel no longer exist due to trace referencing it got overwritten. 1929 if err := verifyResultWithDelay(func() (bool, error) { 1930 cm := channelz.GetChannel(nestedConn) 1931 if cm != nil { 1932 return false, fmt.Errorf("nested channel should have been deleted since its parent's trace should not contain any reference to it anymore") 1933 } 1934 return true, nil 1935 }); err != nil { 1936 t.Fatal(err) 1937 } 1938 } 1939 1940 func (s) TestCZTraceOverwriteSubChannelDeletion(t *testing.T) { 1941 e := tcpClearRREnv 1942 te := newTest(t, e) 1943 channelz.SetMaxTraceEntry(1) 1944 defer channelz.ResetMaxTraceEntryToDefault() 1945 te.startServer(&testServer{security: e.security}) 1946 r := manual.NewBuilderWithScheme("whatever") 1947 r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}}) 1948 te.resolverScheme = r.Scheme() 1949 te.clientConn(grpc.WithResolvers(r)) 1950 defer te.tearDown() 1951 var subConn int64 1952 // Here, we just wait for all sockets to be up. In the future, if we implement 1953 // IDLE, we may need to make several rpc calls to create the sockets. 1954 if err := verifyResultWithDelay(func() (bool, error) { 1955 tcs, _ := channelz.GetTopChannels(0, 0) 1956 if len(tcs) != 1 { 1957 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1958 } 1959 if len(tcs[0].SubChans) != 1 { 1960 return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) 1961 } 1962 for k := range tcs[0].SubChans { 1963 subConn = k 1964 } 1965 return true, nil 1966 }); err != nil { 1967 t.Fatal(err) 1968 } 1969 1970 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1971 defer cancel() 1972 testutils.AwaitState(ctx, t, te.cc, connectivity.Ready) 1973 r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "fake address"}}}) 1974 testutils.AwaitNotState(ctx, t, te.cc, connectivity.Ready) 1975 1976 // verify that the subchannel no longer exist due to trace referencing it got overwritten. 1977 if err := verifyResultWithDelay(func() (bool, error) { 1978 cm := channelz.GetChannel(subConn) 1979 if cm != nil { 1980 return false, fmt.Errorf("subchannel should have been deleted since its parent's trace should not contain any reference to it anymore") 1981 } 1982 return true, nil 1983 }); err != nil { 1984 t.Fatal(err) 1985 } 1986 } 1987 1988 func (s) TestCZTraceTopChannelDeletionTraceClear(t *testing.T) { 1989 e := tcpClearRREnv 1990 te := newTest(t, e) 1991 te.startServer(&testServer{security: e.security}) 1992 r := manual.NewBuilderWithScheme("whatever") 1993 r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}}) 1994 te.resolverScheme = r.Scheme() 1995 te.clientConn(grpc.WithResolvers(r)) 1996 var subConn int64 1997 // Here, we just wait for all sockets to be up. In the future, if we implement 1998 // IDLE, we may need to make several rpc calls to create the sockets. 1999 if err := verifyResultWithDelay(func() (bool, error) { 2000 tcs, _ := channelz.GetTopChannels(0, 0) 2001 if len(tcs) != 1 { 2002 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 2003 } 2004 if len(tcs[0].SubChans) != 1 { 2005 return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) 2006 } 2007 for k := range tcs[0].SubChans { 2008 subConn = k 2009 } 2010 return true, nil 2011 }); err != nil { 2012 t.Fatal(err) 2013 } 2014 te.tearDown() 2015 // verify that the subchannel no longer exist due to parent channel got deleted and its trace cleared. 2016 if err := verifyResultWithDelay(func() (bool, error) { 2017 cm := channelz.GetChannel(subConn) 2018 if cm != nil { 2019 return false, fmt.Errorf("subchannel should have been deleted since its parent's trace should not contain any reference to it anymore") 2020 } 2021 return true, nil 2022 }); err != nil { 2023 t.Fatal(err) 2024 } 2025 }