gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/grpc/test/channelz_test.go (about) 1 /* 2 * 3 * Copyright 2018 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package test 20 21 import ( 22 "context" 23 "fmt" 24 "net" 25 "reflect" 26 "strings" 27 "sync" 28 "testing" 29 "time" 30 31 tls "gitee.com/ks-custle/core-gm/gmtls" 32 33 grpc "gitee.com/ks-custle/core-gm/grpc" 34 _ "gitee.com/ks-custle/core-gm/grpc/balancer/grpclb" 35 "gitee.com/ks-custle/core-gm/grpc/balancer/roundrobin" 36 "gitee.com/ks-custle/core-gm/grpc/codes" 37 "gitee.com/ks-custle/core-gm/grpc/connectivity" 38 "gitee.com/ks-custle/core-gm/grpc/credentials" 39 "gitee.com/ks-custle/core-gm/grpc/internal" 40 "gitee.com/ks-custle/core-gm/grpc/internal/channelz" 41 "gitee.com/ks-custle/core-gm/grpc/internal/stubserver" 42 "gitee.com/ks-custle/core-gm/grpc/keepalive" 43 "gitee.com/ks-custle/core-gm/grpc/resolver" 44 "gitee.com/ks-custle/core-gm/grpc/resolver/manual" 45 "gitee.com/ks-custle/core-gm/grpc/status" 46 testpb "gitee.com/ks-custle/core-gm/grpc/test/grpc_testing" 47 "gitee.com/ks-custle/core-gm/grpc/testdata" 48 "gitee.com/ks-custle/core-gm/net/http2" 49 ) 50 51 func czCleanupWrapper(cleanup func() error, t *testing.T) { 52 if err := cleanup(); err != nil { 53 t.Error(err) 54 } 55 } 56 57 func verifyResultWithDelay(f func() (bool, error)) error { 58 var ok bool 59 var err error 60 for i := 0; i < 1000; i++ { 61 if ok, err = f(); ok { 62 return nil 63 } 64 time.Sleep(10 * time.Millisecond) 65 } 66 return err 67 } 68 69 func (s) TestCZServerRegistrationAndDeletion(t *testing.T) { 70 testcases := []struct { 71 total int 72 start int64 73 max int64 74 length int64 75 end bool 76 }{ 77 {total: int(channelz.EntryPerPage), start: 0, max: 0, length: channelz.EntryPerPage, end: true}, 78 {total: int(channelz.EntryPerPage) - 1, start: 0, max: 0, length: channelz.EntryPerPage - 1, end: true}, 79 {total: int(channelz.EntryPerPage) + 1, start: 0, max: 0, length: channelz.EntryPerPage, end: false}, 80 {total: int(channelz.EntryPerPage) + 1, start: int64(2*(channelz.EntryPerPage+1) + 1), max: 0, length: 0, end: true}, 81 {total: int(channelz.EntryPerPage), start: 0, max: 1, length: 1, end: false}, 82 {total: int(channelz.EntryPerPage), start: 0, max: channelz.EntryPerPage - 1, length: channelz.EntryPerPage - 1, end: false}, 83 } 84 85 for _, c := range testcases { 86 czCleanup := channelz.NewChannelzStorage() 87 defer czCleanupWrapper(czCleanup, t) 88 e := tcpClearRREnv 89 te := newTest(t, e) 90 te.startServers(&testServer{security: e.security}, c.total) 91 92 ss, end := channelz.GetServers(c.start, c.max) 93 if int64(len(ss)) != c.length || end != c.end { 94 t.Fatalf("GetServers(%d) = %+v (len of which: %d), end: %+v, want len(GetServers(%d)) = %d, end: %+v", c.start, ss, len(ss), end, c.start, c.length, c.end) 95 } 96 te.tearDown() 97 ss, end = channelz.GetServers(c.start, c.max) 98 if len(ss) != 0 || !end { 99 t.Fatalf("GetServers(0) = %+v (len of which: %d), end: %+v, want len(GetServers(0)) = 0, end: true", ss, len(ss), end) 100 } 101 } 102 } 103 104 func (s) TestCZGetServer(t *testing.T) { 105 czCleanup := channelz.NewChannelzStorage() 106 defer czCleanupWrapper(czCleanup, t) 107 e := tcpClearRREnv 108 te := newTest(t, e) 109 te.startServer(&testServer{security: e.security}) 110 defer te.tearDown() 111 112 ss, _ := channelz.GetServers(0, 0) 113 if len(ss) != 1 { 114 t.Fatalf("there should only be one server, not %d", len(ss)) 115 } 116 117 serverID := ss[0].ID 118 srv := channelz.GetServer(serverID) 119 if srv == nil { 120 t.Fatalf("server %d does not exist", serverID) 121 } 122 if srv.ID != serverID { 123 t.Fatalf("server want id %d, but got %d", serverID, srv.ID) 124 } 125 126 te.tearDown() 127 128 if err := verifyResultWithDelay(func() (bool, error) { 129 srv := channelz.GetServer(serverID) 130 if srv != nil { 131 return false, fmt.Errorf("server %d should not exist", serverID) 132 } 133 134 return true, nil 135 }); err != nil { 136 t.Fatal(err) 137 } 138 } 139 140 func (s) TestCZTopChannelRegistrationAndDeletion(t *testing.T) { 141 testcases := []struct { 142 total int 143 start int64 144 max int64 145 length int64 146 end bool 147 }{ 148 {total: int(channelz.EntryPerPage), start: 0, max: 0, length: channelz.EntryPerPage, end: true}, 149 {total: int(channelz.EntryPerPage) - 1, start: 0, max: 0, length: channelz.EntryPerPage - 1, end: true}, 150 {total: int(channelz.EntryPerPage) + 1, start: 0, max: 0, length: channelz.EntryPerPage, end: false}, 151 {total: int(channelz.EntryPerPage) + 1, start: int64(2*(channelz.EntryPerPage+1) + 1), max: 0, length: 0, end: true}, 152 {total: int(channelz.EntryPerPage), start: 0, max: 1, length: 1, end: false}, 153 {total: int(channelz.EntryPerPage), start: 0, max: channelz.EntryPerPage - 1, length: channelz.EntryPerPage - 1, end: false}, 154 } 155 156 for _, c := range testcases { 157 czCleanup := channelz.NewChannelzStorage() 158 defer czCleanupWrapper(czCleanup, t) 159 e := tcpClearRREnv 160 te := newTest(t, e) 161 var ccs []*grpc.ClientConn 162 for i := 0; i < c.total; i++ { 163 cc := te.clientConn() 164 te.cc = nil 165 // avoid making next dial blocking 166 te.srvAddr = "" 167 ccs = append(ccs, cc) 168 } 169 if err := verifyResultWithDelay(func() (bool, error) { 170 if tcs, end := channelz.GetTopChannels(c.start, c.max); int64(len(tcs)) != c.length || end != c.end { 171 return false, fmt.Errorf("getTopChannels(%d) = %+v (len of which: %d), end: %+v, want len(GetTopChannels(%d)) = %d, end: %+v", c.start, tcs, len(tcs), end, c.start, c.length, c.end) 172 } 173 return true, nil 174 }); err != nil { 175 t.Fatal(err) 176 } 177 178 for _, cc := range ccs { 179 cc.Close() 180 } 181 182 if err := verifyResultWithDelay(func() (bool, error) { 183 if tcs, end := channelz.GetTopChannels(c.start, c.max); len(tcs) != 0 || !end { 184 return false, fmt.Errorf("getTopChannels(0) = %+v (len of which: %d), end: %+v, want len(GetTopChannels(0)) = 0, end: true", tcs, len(tcs), end) 185 } 186 return true, nil 187 }); err != nil { 188 t.Fatal(err) 189 } 190 te.tearDown() 191 } 192 } 193 194 func (s) TestCZTopChannelRegistrationAndDeletionWhenDialFail(t *testing.T) { 195 czCleanup := channelz.NewChannelzStorage() 196 defer czCleanupWrapper(czCleanup, t) 197 // Make dial fails (due to no transport security specified) 198 _, err := grpc.Dial("fake.addr") 199 if err == nil { 200 t.Fatal("expecting dial to fail") 201 } 202 if tcs, end := channelz.GetTopChannels(0, 0); tcs != nil || !end { 203 t.Fatalf("GetTopChannels(0, 0) = %v, %v, want <nil>, true", tcs, end) 204 } 205 } 206 207 func (s) TestCZNestedChannelRegistrationAndDeletion(t *testing.T) { 208 czCleanup := channelz.NewChannelzStorage() 209 defer czCleanupWrapper(czCleanup, t) 210 e := tcpClearRREnv 211 // avoid calling API to set balancer type, which will void service config's change of balancer. 212 e.balancer = "" 213 te := newTest(t, e) 214 r := manual.NewBuilderWithScheme("whatever") 215 resolvedAddrs := []resolver.Address{{Addr: "127.0.0.1:0", Type: resolver.GRPCLB, ServerName: "grpclb.server"}} 216 r.InitialState(resolver.State{Addresses: resolvedAddrs}) 217 te.resolverScheme = r.Scheme() 218 te.clientConn(grpc.WithResolvers(r)) 219 defer te.tearDown() 220 221 if err := verifyResultWithDelay(func() (bool, error) { 222 tcs, _ := channelz.GetTopChannels(0, 0) 223 if len(tcs) != 1 { 224 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 225 } 226 if len(tcs[0].NestedChans) != 1 { 227 return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(tcs[0].NestedChans)) 228 } 229 return true, nil 230 }); err != nil { 231 t.Fatal(err) 232 } 233 234 r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "127.0.0.1:0"}}, ServiceConfig: parseCfg(r, `{"loadBalancingPolicy": "round_robin"}`)}) 235 236 // wait for the shutdown of grpclb balancer 237 if err := verifyResultWithDelay(func() (bool, error) { 238 tcs, _ := channelz.GetTopChannels(0, 0) 239 if len(tcs) != 1 { 240 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 241 } 242 if len(tcs[0].NestedChans) != 0 { 243 return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(tcs[0].NestedChans)) 244 } 245 return true, nil 246 }); err != nil { 247 t.Fatal(err) 248 } 249 } 250 251 func (s) TestCZClientSubChannelSocketRegistrationAndDeletion(t *testing.T) { 252 czCleanup := channelz.NewChannelzStorage() 253 defer czCleanupWrapper(czCleanup, t) 254 e := tcpClearRREnv 255 num := 3 // number of backends 256 te := newTest(t, e) 257 var svrAddrs []resolver.Address 258 te.startServers(&testServer{security: e.security}, num) 259 r := manual.NewBuilderWithScheme("whatever") 260 for _, a := range te.srvAddrs { 261 svrAddrs = append(svrAddrs, resolver.Address{Addr: a}) 262 } 263 r.InitialState(resolver.State{Addresses: svrAddrs}) 264 te.resolverScheme = r.Scheme() 265 te.clientConn(grpc.WithResolvers(r)) 266 defer te.tearDown() 267 // Here, we just wait for all sockets to be up. In the future, if we implement 268 // IDLE, we may need to make several rpc calls to create the sockets. 269 if err := verifyResultWithDelay(func() (bool, error) { 270 tcs, _ := channelz.GetTopChannels(0, 0) 271 if len(tcs) != 1 { 272 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 273 } 274 if len(tcs[0].SubChans) != num { 275 return false, fmt.Errorf("there should be %d subchannel not %d", num, len(tcs[0].SubChans)) 276 } 277 count := 0 278 for k := range tcs[0].SubChans { 279 sc := channelz.GetSubChannel(k) 280 if sc == nil { 281 return false, fmt.Errorf("got <nil> subchannel") 282 } 283 count += len(sc.Sockets) 284 } 285 if count != num { 286 return false, fmt.Errorf("there should be %d sockets not %d", num, count) 287 } 288 289 return true, nil 290 }); err != nil { 291 t.Fatal(err) 292 } 293 294 r.UpdateState(resolver.State{Addresses: svrAddrs[:len(svrAddrs)-1]}) 295 296 if err := verifyResultWithDelay(func() (bool, error) { 297 tcs, _ := channelz.GetTopChannels(0, 0) 298 if len(tcs) != 1 { 299 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 300 } 301 if len(tcs[0].SubChans) != num-1 { 302 return false, fmt.Errorf("there should be %d subchannel not %d", num-1, len(tcs[0].SubChans)) 303 } 304 count := 0 305 for k := range tcs[0].SubChans { 306 sc := channelz.GetSubChannel(k) 307 if sc == nil { 308 return false, fmt.Errorf("got <nil> subchannel") 309 } 310 count += len(sc.Sockets) 311 } 312 if count != num-1 { 313 return false, fmt.Errorf("there should be %d sockets not %d", num-1, count) 314 } 315 316 return true, nil 317 }); err != nil { 318 t.Fatal(err) 319 } 320 } 321 322 func (s) TestCZServerSocketRegistrationAndDeletion(t *testing.T) { 323 testcases := []struct { 324 total int 325 start int64 326 max int64 327 length int64 328 end bool 329 }{ 330 {total: int(channelz.EntryPerPage), start: 0, max: 0, length: channelz.EntryPerPage, end: true}, 331 {total: int(channelz.EntryPerPage) - 1, start: 0, max: 0, length: channelz.EntryPerPage - 1, end: true}, 332 {total: int(channelz.EntryPerPage) + 1, start: 0, max: 0, length: channelz.EntryPerPage, end: false}, 333 {total: int(channelz.EntryPerPage), start: 1, max: 0, length: channelz.EntryPerPage - 1, end: true}, 334 {total: int(channelz.EntryPerPage) + 1, start: channelz.EntryPerPage + 1, max: 0, length: 0, end: true}, 335 {total: int(channelz.EntryPerPage), start: 0, max: 1, length: 1, end: false}, 336 {total: int(channelz.EntryPerPage), start: 0, max: channelz.EntryPerPage - 1, length: channelz.EntryPerPage - 1, end: false}, 337 } 338 339 for _, c := range testcases { 340 czCleanup := channelz.NewChannelzStorage() 341 defer czCleanupWrapper(czCleanup, t) 342 e := tcpClearRREnv 343 te := newTest(t, e) 344 te.startServer(&testServer{security: e.security}) 345 var ccs []*grpc.ClientConn 346 for i := 0; i < c.total; i++ { 347 cc := te.clientConn() 348 te.cc = nil 349 ccs = append(ccs, cc) 350 } 351 352 var svrID int64 353 if err := verifyResultWithDelay(func() (bool, error) { 354 ss, _ := channelz.GetServers(0, 0) 355 if len(ss) != 1 { 356 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 357 } 358 if len(ss[0].ListenSockets) != 1 { 359 return false, fmt.Errorf("there should only be one server listen socket, not %d", len(ss[0].ListenSockets)) 360 } 361 362 startID := c.start 363 if startID != 0 { 364 ns, _ := channelz.GetServerSockets(ss[0].ID, 0, int64(c.total)) 365 if int64(len(ns)) < c.start { 366 return false, fmt.Errorf("there should more than %d sockets, not %d", len(ns), c.start) 367 } 368 startID = ns[c.start-1].ID + 1 369 } 370 371 ns, end := channelz.GetServerSockets(ss[0].ID, startID, c.max) 372 if int64(len(ns)) != c.length || end != c.end { 373 return false, fmt.Errorf("GetServerSockets(%d) = %+v (len of which: %d), end: %+v, want len(GetServerSockets(%d)) = %d, end: %+v", c.start, ns, len(ns), end, c.start, c.length, c.end) 374 } 375 376 svrID = ss[0].ID 377 return true, nil 378 }); err != nil { 379 t.Fatal(err) 380 } 381 382 for _, cc := range ccs { 383 cc.Close() 384 } 385 386 if err := verifyResultWithDelay(func() (bool, error) { 387 ns, _ := channelz.GetServerSockets(svrID, c.start, c.max) 388 if len(ns) != 0 { 389 return false, fmt.Errorf("there should be %d normal sockets not %d", 0, len(ns)) 390 } 391 return true, nil 392 }); err != nil { 393 t.Fatal(err) 394 } 395 te.tearDown() 396 } 397 } 398 399 func (s) TestCZServerListenSocketDeletion(t *testing.T) { 400 czCleanup := channelz.NewChannelzStorage() 401 defer czCleanupWrapper(czCleanup, t) 402 s := grpc.NewServer() 403 lis, err := net.Listen("tcp", "localhost:0") 404 if err != nil { 405 t.Fatalf("failed to listen: %v", err) 406 } 407 go s.Serve(lis) 408 if err := verifyResultWithDelay(func() (bool, error) { 409 ss, _ := channelz.GetServers(0, 0) 410 if len(ss) != 1 { 411 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 412 } 413 if len(ss[0].ListenSockets) != 1 { 414 return false, fmt.Errorf("there should only be one server listen socket, not %d", len(ss[0].ListenSockets)) 415 } 416 return true, nil 417 }); err != nil { 418 t.Fatal(err) 419 } 420 421 lis.Close() 422 if err := verifyResultWithDelay(func() (bool, error) { 423 ss, _ := channelz.GetServers(0, 0) 424 if len(ss) != 1 { 425 return false, fmt.Errorf("there should be 1 server, not %d", len(ss)) 426 } 427 if len(ss[0].ListenSockets) != 0 { 428 return false, fmt.Errorf("there should only be %d server listen socket, not %d", 0, len(ss[0].ListenSockets)) 429 } 430 return true, nil 431 }); err != nil { 432 t.Fatal(err) 433 } 434 s.Stop() 435 } 436 437 type dummyChannel struct{} 438 439 func (d *dummyChannel) ChannelzMetric() *channelz.ChannelInternalMetric { 440 return &channelz.ChannelInternalMetric{} 441 } 442 443 type dummySocket struct{} 444 445 func (d *dummySocket) ChannelzMetric() *channelz.SocketInternalMetric { 446 return &channelz.SocketInternalMetric{} 447 } 448 449 func (s) TestCZRecusivelyDeletionOfEntry(t *testing.T) { 450 // +--+TopChan+---+ 451 // | | 452 // v v 453 // +-+SubChan1+--+ SubChan2 454 // | | 455 // v v 456 // Socket1 Socket2 457 czCleanup := channelz.NewChannelzStorage() 458 defer czCleanupWrapper(czCleanup, t) 459 topChanID := channelz.RegisterChannel(&dummyChannel{}, 0, "") 460 subChanID1 := channelz.RegisterSubChannel(&dummyChannel{}, topChanID, "") 461 subChanID2 := channelz.RegisterSubChannel(&dummyChannel{}, topChanID, "") 462 sktID1 := channelz.RegisterNormalSocket(&dummySocket{}, subChanID1, "") 463 sktID2 := channelz.RegisterNormalSocket(&dummySocket{}, subChanID1, "") 464 465 tcs, _ := channelz.GetTopChannels(0, 0) 466 if tcs == nil || len(tcs) != 1 { 467 t.Fatalf("There should be one TopChannel entry") 468 } 469 if len(tcs[0].SubChans) != 2 { 470 t.Fatalf("There should be two SubChannel entries") 471 } 472 sc := channelz.GetSubChannel(subChanID1) 473 if sc == nil || len(sc.Sockets) != 2 { 474 t.Fatalf("There should be two Socket entries") 475 } 476 477 channelz.RemoveEntry(topChanID) 478 tcs, _ = channelz.GetTopChannels(0, 0) 479 if tcs == nil || len(tcs) != 1 { 480 t.Fatalf("There should be one TopChannel entry") 481 } 482 483 channelz.RemoveEntry(subChanID1) 484 channelz.RemoveEntry(subChanID2) 485 tcs, _ = channelz.GetTopChannels(0, 0) 486 if tcs == nil || len(tcs) != 1 { 487 t.Fatalf("There should be one TopChannel entry") 488 } 489 if len(tcs[0].SubChans) != 1 { 490 t.Fatalf("There should be one SubChannel entry") 491 } 492 493 channelz.RemoveEntry(sktID1) 494 channelz.RemoveEntry(sktID2) 495 tcs, _ = channelz.GetTopChannels(0, 0) 496 if tcs != nil { 497 t.Fatalf("There should be no TopChannel entry") 498 } 499 } 500 501 func (s) TestCZChannelMetrics(t *testing.T) { 502 czCleanup := channelz.NewChannelzStorage() 503 defer czCleanupWrapper(czCleanup, t) 504 e := tcpClearRREnv 505 num := 3 // number of backends 506 te := newTest(t, e) 507 te.maxClientSendMsgSize = newInt(8) 508 var svrAddrs []resolver.Address 509 te.startServers(&testServer{security: e.security}, num) 510 r := manual.NewBuilderWithScheme("whatever") 511 for _, a := range te.srvAddrs { 512 svrAddrs = append(svrAddrs, resolver.Address{Addr: a}) 513 } 514 r.InitialState(resolver.State{Addresses: svrAddrs}) 515 te.resolverScheme = r.Scheme() 516 cc := te.clientConn(grpc.WithResolvers(r)) 517 defer te.tearDown() 518 tc := testpb.NewTestServiceClient(cc) 519 if _, err := tc.EmptyCall(context.Background(), &testpb.Empty{}); err != nil { 520 t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err) 521 } 522 523 const smallSize = 1 524 const largeSize = 8 525 526 largePayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, largeSize) 527 if err != nil { 528 t.Fatal(err) 529 } 530 req := &testpb.SimpleRequest{ 531 ResponseType: testpb.PayloadType_COMPRESSABLE, 532 ResponseSize: int32(smallSize), 533 Payload: largePayload, 534 } 535 536 if _, err := tc.UnaryCall(context.Background(), req); err == nil || status.Code(err) != codes.ResourceExhausted { 537 t.Fatalf("TestService/UnaryCall(_, _) = _, %v, want _, error code: %s", err, codes.ResourceExhausted) 538 } 539 540 stream, err := tc.FullDuplexCall(context.Background()) 541 if err != nil { 542 t.Fatalf("%v.FullDuplexCall(_) = _, %v, want <nil>", tc, err) 543 } 544 defer stream.CloseSend() 545 // Here, we just wait for all sockets to be up. In the future, if we implement 546 // IDLE, we may need to make several rpc calls to create the sockets. 547 if err := verifyResultWithDelay(func() (bool, error) { 548 tcs, _ := channelz.GetTopChannels(0, 0) 549 if len(tcs) != 1 { 550 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 551 } 552 if len(tcs[0].SubChans) != num { 553 return false, fmt.Errorf("there should be %d subchannel not %d", num, len(tcs[0].SubChans)) 554 } 555 var cst, csu, cf int64 556 for k := range tcs[0].SubChans { 557 sc := channelz.GetSubChannel(k) 558 if sc == nil { 559 return false, fmt.Errorf("got <nil> subchannel") 560 } 561 cst += sc.ChannelData.CallsStarted 562 csu += sc.ChannelData.CallsSucceeded 563 cf += sc.ChannelData.CallsFailed 564 } 565 if cst != 3 { 566 return false, fmt.Errorf("there should be 3 CallsStarted not %d", cst) 567 } 568 if csu != 1 { 569 return false, fmt.Errorf("there should be 1 CallsSucceeded not %d", csu) 570 } 571 if cf != 1 { 572 return false, fmt.Errorf("there should be 1 CallsFailed not %d", cf) 573 } 574 if tcs[0].ChannelData.CallsStarted != 3 { 575 return false, fmt.Errorf("there should be 3 CallsStarted not %d", tcs[0].ChannelData.CallsStarted) 576 } 577 if tcs[0].ChannelData.CallsSucceeded != 1 { 578 return false, fmt.Errorf("there should be 1 CallsSucceeded not %d", tcs[0].ChannelData.CallsSucceeded) 579 } 580 if tcs[0].ChannelData.CallsFailed != 1 { 581 return false, fmt.Errorf("there should be 1 CallsFailed not %d", tcs[0].ChannelData.CallsFailed) 582 } 583 return true, nil 584 }); err != nil { 585 t.Fatal(err) 586 } 587 } 588 589 func (s) TestCZServerMetrics(t *testing.T) { 590 czCleanup := channelz.NewChannelzStorage() 591 defer czCleanupWrapper(czCleanup, t) 592 e := tcpClearRREnv 593 te := newTest(t, e) 594 te.maxServerReceiveMsgSize = newInt(8) 595 te.startServer(&testServer{security: e.security}) 596 defer te.tearDown() 597 cc := te.clientConn() 598 tc := testpb.NewTestServiceClient(cc) 599 if _, err := tc.EmptyCall(context.Background(), &testpb.Empty{}); err != nil { 600 t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err) 601 } 602 603 const smallSize = 1 604 const largeSize = 8 605 606 largePayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, largeSize) 607 if err != nil { 608 t.Fatal(err) 609 } 610 req := &testpb.SimpleRequest{ 611 ResponseType: testpb.PayloadType_COMPRESSABLE, 612 ResponseSize: int32(smallSize), 613 Payload: largePayload, 614 } 615 if _, err := tc.UnaryCall(context.Background(), req); err == nil || status.Code(err) != codes.ResourceExhausted { 616 t.Fatalf("TestService/UnaryCall(_, _) = _, %v, want _, error code: %s", err, codes.ResourceExhausted) 617 } 618 619 stream, err := tc.FullDuplexCall(context.Background()) 620 if err != nil { 621 t.Fatalf("%v.FullDuplexCall(_) = _, %v, want <nil>", tc, err) 622 } 623 defer stream.CloseSend() 624 625 if err := verifyResultWithDelay(func() (bool, error) { 626 ss, _ := channelz.GetServers(0, 0) 627 if len(ss) != 1 { 628 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 629 } 630 if ss[0].ServerData.CallsStarted != 3 { 631 return false, fmt.Errorf("there should be 3 CallsStarted not %d", ss[0].ServerData.CallsStarted) 632 } 633 if ss[0].ServerData.CallsSucceeded != 1 { 634 return false, fmt.Errorf("there should be 1 CallsSucceeded not %d", ss[0].ServerData.CallsSucceeded) 635 } 636 if ss[0].ServerData.CallsFailed != 1 { 637 return false, fmt.Errorf("there should be 1 CallsFailed not %d", ss[0].ServerData.CallsFailed) 638 } 639 return true, nil 640 }); err != nil { 641 t.Fatal(err) 642 } 643 } 644 645 type testServiceClientWrapper struct { 646 testpb.TestServiceClient 647 mu sync.RWMutex 648 streamsCreated int 649 } 650 651 func (t *testServiceClientWrapper) getCurrentStreamID() uint32 { 652 t.mu.RLock() 653 defer t.mu.RUnlock() 654 return uint32(2*t.streamsCreated - 1) 655 } 656 657 func (t *testServiceClientWrapper) EmptyCall(ctx context.Context, in *testpb.Empty, opts ...grpc.CallOption) (*testpb.Empty, error) { 658 t.mu.Lock() 659 defer t.mu.Unlock() 660 t.streamsCreated++ 661 return t.TestServiceClient.EmptyCall(ctx, in, opts...) 662 } 663 664 func (t *testServiceClientWrapper) UnaryCall(ctx context.Context, in *testpb.SimpleRequest, opts ...grpc.CallOption) (*testpb.SimpleResponse, error) { 665 t.mu.Lock() 666 defer t.mu.Unlock() 667 t.streamsCreated++ 668 return t.TestServiceClient.UnaryCall(ctx, in, opts...) 669 } 670 671 func (t *testServiceClientWrapper) StreamingOutputCall(ctx context.Context, in *testpb.StreamingOutputCallRequest, opts ...grpc.CallOption) (testpb.TestService_StreamingOutputCallClient, error) { 672 t.mu.Lock() 673 defer t.mu.Unlock() 674 t.streamsCreated++ 675 return t.TestServiceClient.StreamingOutputCall(ctx, in, opts...) 676 } 677 678 func (t *testServiceClientWrapper) StreamingInputCall(ctx context.Context, opts ...grpc.CallOption) (testpb.TestService_StreamingInputCallClient, error) { 679 t.mu.Lock() 680 defer t.mu.Unlock() 681 t.streamsCreated++ 682 return t.TestServiceClient.StreamingInputCall(ctx, opts...) 683 } 684 685 func (t *testServiceClientWrapper) FullDuplexCall(ctx context.Context, opts ...grpc.CallOption) (testpb.TestService_FullDuplexCallClient, error) { 686 t.mu.Lock() 687 defer t.mu.Unlock() 688 t.streamsCreated++ 689 return t.TestServiceClient.FullDuplexCall(ctx, opts...) 690 } 691 692 func (t *testServiceClientWrapper) HalfDuplexCall(ctx context.Context, opts ...grpc.CallOption) (testpb.TestService_HalfDuplexCallClient, error) { 693 t.mu.Lock() 694 defer t.mu.Unlock() 695 t.streamsCreated++ 696 return t.TestServiceClient.HalfDuplexCall(ctx, opts...) 697 } 698 699 func doSuccessfulUnaryCall(tc testpb.TestServiceClient, t *testing.T) { 700 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 701 defer cancel() 702 if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil { 703 t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err) 704 } 705 } 706 707 func doStreamingInputCallWithLargePayload(tc testpb.TestServiceClient, t *testing.T) { 708 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 709 defer cancel() 710 s, err := tc.StreamingInputCall(ctx) 711 if err != nil { 712 t.Fatalf("TestService/StreamingInputCall(_) = _, %v, want <nil>", err) 713 } 714 payload, err := newPayload(testpb.PayloadType_COMPRESSABLE, 10000) 715 if err != nil { 716 t.Fatal(err) 717 } 718 s.Send(&testpb.StreamingInputCallRequest{Payload: payload}) 719 } 720 721 func doServerSideFailedUnaryCall(tc testpb.TestServiceClient, t *testing.T) { 722 const smallSize = 1 723 const largeSize = 2000 724 725 largePayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, largeSize) 726 if err != nil { 727 t.Fatal(err) 728 } 729 req := &testpb.SimpleRequest{ 730 ResponseType: testpb.PayloadType_COMPRESSABLE, 731 ResponseSize: int32(smallSize), 732 Payload: largePayload, 733 } 734 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 735 defer cancel() 736 if _, err := tc.UnaryCall(ctx, req); err == nil || status.Code(err) != codes.ResourceExhausted { 737 t.Fatalf("TestService/UnaryCall(_, _) = _, %v, want _, error code: %s", err, codes.ResourceExhausted) 738 } 739 } 740 741 func doClientSideInitiatedFailedStream(tc testpb.TestServiceClient, t *testing.T) { 742 ctx, cancel := context.WithCancel(context.Background()) 743 stream, err := tc.FullDuplexCall(ctx) 744 if err != nil { 745 t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err) 746 } 747 748 const smallSize = 1 749 smallPayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, smallSize) 750 if err != nil { 751 t.Fatal(err) 752 } 753 754 sreq := &testpb.StreamingOutputCallRequest{ 755 ResponseType: testpb.PayloadType_COMPRESSABLE, 756 ResponseParameters: []*testpb.ResponseParameters{ 757 {Size: smallSize}, 758 }, 759 Payload: smallPayload, 760 } 761 762 if err := stream.Send(sreq); err != nil { 763 t.Fatalf("%v.Send(%v) = %v, want <nil>", stream, sreq, err) 764 } 765 if _, err := stream.Recv(); err != nil { 766 t.Fatalf("%v.Recv() = %v, want <nil>", stream, err) 767 } 768 // By canceling the call, the client will send rst_stream to end the call, and 769 // the stream will failed as a result. 770 cancel() 771 } 772 773 // This func is to be used to test client side counting of failed streams. 774 func doServerSideInitiatedFailedStreamWithRSTStream(tc testpb.TestServiceClient, t *testing.T, l *listenerWrapper) { 775 stream, err := tc.FullDuplexCall(context.Background()) 776 if err != nil { 777 t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err) 778 } 779 780 const smallSize = 1 781 smallPayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, smallSize) 782 if err != nil { 783 t.Fatal(err) 784 } 785 786 sreq := &testpb.StreamingOutputCallRequest{ 787 ResponseType: testpb.PayloadType_COMPRESSABLE, 788 ResponseParameters: []*testpb.ResponseParameters{ 789 {Size: smallSize}, 790 }, 791 Payload: smallPayload, 792 } 793 794 if err := stream.Send(sreq); err != nil { 795 t.Fatalf("%v.Send(%v) = %v, want <nil>", stream, sreq, err) 796 } 797 if _, err := stream.Recv(); err != nil { 798 t.Fatalf("%v.Recv() = %v, want <nil>", stream, err) 799 } 800 801 rcw := l.getLastConn() 802 803 if rcw != nil { 804 rcw.writeRSTStream(tc.(*testServiceClientWrapper).getCurrentStreamID(), http2.ErrCodeCancel) 805 } 806 if _, err := stream.Recv(); err == nil { 807 t.Fatalf("%v.Recv() = %v, want <non-nil>", stream, err) 808 } 809 } 810 811 // this func is to be used to test client side counting of failed streams. 812 func doServerSideInitiatedFailedStreamWithGoAway(tc testpb.TestServiceClient, t *testing.T, l *listenerWrapper) { 813 // This call is just to keep the transport from shutting down (socket will be deleted 814 // in this case, and we will not be able to get metrics). 815 s, err := tc.FullDuplexCall(context.Background()) 816 if err != nil { 817 t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err) 818 } 819 if err := s.Send(&testpb.StreamingOutputCallRequest{ResponseParameters: []*testpb.ResponseParameters{ 820 { 821 Size: 1, 822 }, 823 }}); err != nil { 824 t.Fatalf("s.Send() failed with error: %v", err) 825 } 826 if _, err := s.Recv(); err != nil { 827 t.Fatalf("s.Recv() failed with error: %v", err) 828 } 829 830 s, err = tc.FullDuplexCall(context.Background()) 831 if err != nil { 832 t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err) 833 } 834 if err := s.Send(&testpb.StreamingOutputCallRequest{ResponseParameters: []*testpb.ResponseParameters{ 835 { 836 Size: 1, 837 }, 838 }}); err != nil { 839 t.Fatalf("s.Send() failed with error: %v", err) 840 } 841 if _, err := s.Recv(); err != nil { 842 t.Fatalf("s.Recv() failed with error: %v", err) 843 } 844 845 rcw := l.getLastConn() 846 if rcw != nil { 847 rcw.writeGoAway(tc.(*testServiceClientWrapper).getCurrentStreamID()-2, http2.ErrCodeCancel, []byte{}) 848 } 849 if _, err := s.Recv(); err == nil { 850 t.Fatalf("%v.Recv() = %v, want <non-nil>", s, err) 851 } 852 } 853 854 func doIdleCallToInvokeKeepAlive(tc testpb.TestServiceClient, t *testing.T) { 855 ctx, cancel := context.WithCancel(context.Background()) 856 _, err := tc.FullDuplexCall(ctx) 857 if err != nil { 858 t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err) 859 } 860 // Allow for at least 2 keepalives (1s per ping interval) 861 time.Sleep(4 * time.Second) 862 cancel() 863 } 864 865 func (s) TestCZClientSocketMetricsStreamsAndMessagesCount(t *testing.T) { 866 czCleanup := channelz.NewChannelzStorage() 867 defer czCleanupWrapper(czCleanup, t) 868 e := tcpClearRREnv 869 te := newTest(t, e) 870 te.maxServerReceiveMsgSize = newInt(20) 871 te.maxClientReceiveMsgSize = newInt(20) 872 rcw := te.startServerWithConnControl(&testServer{security: e.security}) 873 defer te.tearDown() 874 cc := te.clientConn() 875 tc := &testServiceClientWrapper{TestServiceClient: testpb.NewTestServiceClient(cc)} 876 877 doSuccessfulUnaryCall(tc, t) 878 var scID, skID int64 879 if err := verifyResultWithDelay(func() (bool, error) { 880 tchan, _ := channelz.GetTopChannels(0, 0) 881 if len(tchan) != 1 { 882 return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) 883 } 884 if len(tchan[0].SubChans) != 1 { 885 return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) 886 } 887 888 for scID = range tchan[0].SubChans { 889 break 890 } 891 sc := channelz.GetSubChannel(scID) 892 if sc == nil { 893 return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", scID) 894 } 895 if len(sc.Sockets) != 1 { 896 return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) 897 } 898 for skID = range sc.Sockets { 899 break 900 } 901 skt := channelz.GetSocket(skID) 902 sktData := skt.SocketData 903 if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 1 || sktData.MessagesSent != 1 || sktData.MessagesReceived != 1 { 904 return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, MessagesSent, MessagesReceived) = (1, 1, 1, 1), got (%d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.MessagesSent, sktData.MessagesReceived) 905 } 906 return true, nil 907 }); err != nil { 908 t.Fatal(err) 909 } 910 911 doServerSideFailedUnaryCall(tc, t) 912 if err := verifyResultWithDelay(func() (bool, error) { 913 skt := channelz.GetSocket(skID) 914 sktData := skt.SocketData 915 if sktData.StreamsStarted != 2 || sktData.StreamsSucceeded != 2 || sktData.MessagesSent != 2 || sktData.MessagesReceived != 1 { 916 return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, MessagesSent, MessagesReceived) = (2, 2, 2, 1), got (%d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.MessagesSent, sktData.MessagesReceived) 917 } 918 return true, nil 919 }); err != nil { 920 t.Fatal(err) 921 } 922 923 doClientSideInitiatedFailedStream(tc, t) 924 if err := verifyResultWithDelay(func() (bool, error) { 925 skt := channelz.GetSocket(skID) 926 sktData := skt.SocketData 927 if sktData.StreamsStarted != 3 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 1 || sktData.MessagesSent != 3 || sktData.MessagesReceived != 2 { 928 return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (3, 2, 1, 3, 2), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) 929 } 930 return true, nil 931 }); err != nil { 932 t.Fatal(err) 933 } 934 935 doServerSideInitiatedFailedStreamWithRSTStream(tc, t, rcw) 936 if err := verifyResultWithDelay(func() (bool, error) { 937 skt := channelz.GetSocket(skID) 938 sktData := skt.SocketData 939 if sktData.StreamsStarted != 4 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 2 || sktData.MessagesSent != 4 || sktData.MessagesReceived != 3 { 940 return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (4, 2, 2, 4, 3), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) 941 } 942 return true, nil 943 }); err != nil { 944 t.Fatal(err) 945 } 946 947 doServerSideInitiatedFailedStreamWithGoAway(tc, t, rcw) 948 if err := verifyResultWithDelay(func() (bool, error) { 949 skt := channelz.GetSocket(skID) 950 sktData := skt.SocketData 951 if sktData.StreamsStarted != 6 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 3 || sktData.MessagesSent != 6 || sktData.MessagesReceived != 5 { 952 return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (6, 2, 3, 6, 5), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) 953 } 954 return true, nil 955 }); err != nil { 956 t.Fatal(err) 957 } 958 } 959 960 // This test is to complete TestCZClientSocketMetricsStreamsAndMessagesCount and 961 // TestCZServerSocketMetricsStreamsAndMessagesCount by adding the test case of 962 // server sending RST_STREAM to client due to client side flow control violation. 963 // It is separated from other cases due to setup incompatibly, i.e. max receive 964 // size violation will mask flow control violation. 965 func (s) TestCZClientAndServerSocketMetricsStreamsCountFlowControlRSTStream(t *testing.T) { 966 czCleanup := channelz.NewChannelzStorage() 967 defer czCleanupWrapper(czCleanup, t) 968 e := tcpClearRREnv 969 te := newTest(t, e) 970 te.serverInitialWindowSize = 65536 971 // Avoid overflowing connection level flow control window, which will lead to 972 // transport being closed. 973 te.serverInitialConnWindowSize = 65536 * 2 974 ts := &stubserver.StubServer{FullDuplexCallF: func(stream testpb.TestService_FullDuplexCallServer) error { 975 stream.Send(&testpb.StreamingOutputCallResponse{}) 976 <-stream.Context().Done() 977 return status.Errorf(codes.DeadlineExceeded, "deadline exceeded or cancelled") 978 }} 979 te.startServer(ts) 980 defer te.tearDown() 981 cc, dw := te.clientConnWithConnControl() 982 tc := &testServiceClientWrapper{TestServiceClient: testpb.NewTestServiceClient(cc)} 983 984 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 985 stream, err := tc.FullDuplexCall(ctx) 986 if err != nil { 987 t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err) 988 } 989 if _, err := stream.Recv(); err != nil { 990 t.Fatalf("stream.Recv() = %v, want nil", err) 991 } 992 go func() { 993 payload := make([]byte, 16384) 994 for i := 0; i < 6; i++ { 995 dw.getRawConnWrapper().writeRawFrame(http2.FrameData, 0, tc.getCurrentStreamID(), payload) 996 } 997 }() 998 if _, err := stream.Recv(); status.Code(err) != codes.ResourceExhausted { 999 t.Fatalf("stream.Recv() = %v, want error code: %v", err, codes.ResourceExhausted) 1000 } 1001 cancel() 1002 1003 if err := verifyResultWithDelay(func() (bool, error) { 1004 tchan, _ := channelz.GetTopChannels(0, 0) 1005 if len(tchan) != 1 { 1006 return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) 1007 } 1008 if len(tchan[0].SubChans) != 1 { 1009 return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) 1010 } 1011 var id int64 1012 for id = range tchan[0].SubChans { 1013 break 1014 } 1015 sc := channelz.GetSubChannel(id) 1016 if sc == nil { 1017 return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id) 1018 } 1019 if len(sc.Sockets) != 1 { 1020 return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) 1021 } 1022 for id = range sc.Sockets { 1023 break 1024 } 1025 skt := channelz.GetSocket(id) 1026 sktData := skt.SocketData 1027 if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 0 || sktData.StreamsFailed != 1 { 1028 return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed) = (1, 0, 1), got (%d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed) 1029 } 1030 ss, _ := channelz.GetServers(0, 0) 1031 if len(ss) != 1 { 1032 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 1033 } 1034 1035 ns, _ := channelz.GetServerSockets(ss[0].ID, 0, 0) 1036 if len(ns) != 1 { 1037 return false, fmt.Errorf("there should be one server normal socket, not %d", len(ns)) 1038 } 1039 sktData = ns[0].SocketData 1040 if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 0 || sktData.StreamsFailed != 1 { 1041 return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, StreamsFailed) = (1, 0, 1), got (%d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed) 1042 } 1043 return true, nil 1044 }); err != nil { 1045 t.Fatal(err) 1046 } 1047 } 1048 1049 func (s) TestCZClientAndServerSocketMetricsFlowControl(t *testing.T) { 1050 czCleanup := channelz.NewChannelzStorage() 1051 defer czCleanupWrapper(czCleanup, t) 1052 e := tcpClearRREnv 1053 te := newTest(t, e) 1054 // disable BDP 1055 te.serverInitialWindowSize = 65536 1056 te.serverInitialConnWindowSize = 65536 1057 te.clientInitialWindowSize = 65536 1058 te.clientInitialConnWindowSize = 65536 1059 te.startServer(&testServer{security: e.security}) 1060 defer te.tearDown() 1061 cc := te.clientConn() 1062 tc := testpb.NewTestServiceClient(cc) 1063 1064 for i := 0; i < 10; i++ { 1065 doSuccessfulUnaryCall(tc, t) 1066 } 1067 1068 var cliSktID, svrSktID int64 1069 if err := verifyResultWithDelay(func() (bool, error) { 1070 tchan, _ := channelz.GetTopChannels(0, 0) 1071 if len(tchan) != 1 { 1072 return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) 1073 } 1074 if len(tchan[0].SubChans) != 1 { 1075 return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) 1076 } 1077 var id int64 1078 for id = range tchan[0].SubChans { 1079 break 1080 } 1081 sc := channelz.GetSubChannel(id) 1082 if sc == nil { 1083 return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id) 1084 } 1085 if len(sc.Sockets) != 1 { 1086 return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) 1087 } 1088 for id = range sc.Sockets { 1089 break 1090 } 1091 skt := channelz.GetSocket(id) 1092 sktData := skt.SocketData 1093 // 65536 - 5 (Length-Prefixed-Message size) * 10 = 65486 1094 if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 65486 { 1095 return false, fmt.Errorf("client: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65536, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) 1096 } 1097 ss, _ := channelz.GetServers(0, 0) 1098 if len(ss) != 1 { 1099 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 1100 } 1101 ns, _ := channelz.GetServerSockets(ss[0].ID, 0, 0) 1102 sktData = ns[0].SocketData 1103 if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 65486 { 1104 return false, fmt.Errorf("server: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65536, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) 1105 } 1106 cliSktID, svrSktID = id, ss[0].ID 1107 return true, nil 1108 }); err != nil { 1109 t.Fatal(err) 1110 } 1111 1112 doStreamingInputCallWithLargePayload(tc, t) 1113 1114 if err := verifyResultWithDelay(func() (bool, error) { 1115 skt := channelz.GetSocket(cliSktID) 1116 sktData := skt.SocketData 1117 // Local: 65536 - 5 (Length-Prefixed-Message size) * 10 = 65486 1118 // Remote: 65536 - 5 (Length-Prefixed-Message size) * 10 - 10011 = 55475 1119 if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 55475 { 1120 return false, fmt.Errorf("client: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65486, 55475), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) 1121 } 1122 ss, _ := channelz.GetServers(0, 0) 1123 if len(ss) != 1 { 1124 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 1125 } 1126 ns, _ := channelz.GetServerSockets(svrSktID, 0, 0) 1127 sktData = ns[0].SocketData 1128 if sktData.LocalFlowControlWindow != 55475 || sktData.RemoteFlowControlWindow != 65486 { 1129 return false, fmt.Errorf("server: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (55475, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) 1130 } 1131 return true, nil 1132 }); err != nil { 1133 t.Fatal(err) 1134 } 1135 1136 // triggers transport flow control window update on server side, since unacked 1137 // bytes should be larger than limit now. i.e. 50 + 20022 > 65536/4. 1138 doStreamingInputCallWithLargePayload(tc, t) 1139 if err := verifyResultWithDelay(func() (bool, error) { 1140 skt := channelz.GetSocket(cliSktID) 1141 sktData := skt.SocketData 1142 // Local: 65536 - 5 (Length-Prefixed-Message size) * 10 = 65486 1143 // Remote: 65536 1144 if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 65536 { 1145 return false, fmt.Errorf("client: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65486, 65536), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) 1146 } 1147 ss, _ := channelz.GetServers(0, 0) 1148 if len(ss) != 1 { 1149 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 1150 } 1151 ns, _ := channelz.GetServerSockets(svrSktID, 0, 0) 1152 sktData = ns[0].SocketData 1153 if sktData.LocalFlowControlWindow != 65536 || sktData.RemoteFlowControlWindow != 65486 { 1154 return false, fmt.Errorf("server: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65536, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) 1155 } 1156 return true, nil 1157 }); err != nil { 1158 t.Fatal(err) 1159 } 1160 } 1161 1162 func (s) TestCZClientSocketMetricsKeepAlive(t *testing.T) { 1163 czCleanup := channelz.NewChannelzStorage() 1164 defer czCleanupWrapper(czCleanup, t) 1165 defer func(t time.Duration) { internal.KeepaliveMinPingTime = t }(internal.KeepaliveMinPingTime) 1166 internal.KeepaliveMinPingTime = time.Second 1167 e := tcpClearRREnv 1168 te := newTest(t, e) 1169 te.customDialOptions = append(te.customDialOptions, grpc.WithKeepaliveParams( 1170 keepalive.ClientParameters{ 1171 Time: time.Second, 1172 Timeout: 500 * time.Millisecond, 1173 PermitWithoutStream: true, 1174 })) 1175 te.customServerOptions = append(te.customServerOptions, grpc.KeepaliveEnforcementPolicy( 1176 keepalive.EnforcementPolicy{ 1177 MinTime: 500 * time.Millisecond, 1178 PermitWithoutStream: true, 1179 })) 1180 te.startServer(&testServer{security: e.security}) 1181 te.clientConn() // Dial the server 1182 defer te.tearDown() 1183 if err := verifyResultWithDelay(func() (bool, error) { 1184 tchan, _ := channelz.GetTopChannels(0, 0) 1185 if len(tchan) != 1 { 1186 return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) 1187 } 1188 if len(tchan[0].SubChans) != 1 { 1189 return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) 1190 } 1191 var id int64 1192 for id = range tchan[0].SubChans { 1193 break 1194 } 1195 sc := channelz.GetSubChannel(id) 1196 if sc == nil { 1197 return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id) 1198 } 1199 if len(sc.Sockets) != 1 { 1200 return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) 1201 } 1202 for id = range sc.Sockets { 1203 break 1204 } 1205 skt := channelz.GetSocket(id) 1206 if skt.SocketData.KeepAlivesSent != 2 { 1207 return false, fmt.Errorf("there should be 2 KeepAlives sent, not %d", skt.SocketData.KeepAlivesSent) 1208 } 1209 return true, nil 1210 }); err != nil { 1211 t.Fatal(err) 1212 } 1213 } 1214 1215 func (s) TestCZServerSocketMetricsStreamsAndMessagesCount(t *testing.T) { 1216 czCleanup := channelz.NewChannelzStorage() 1217 defer czCleanupWrapper(czCleanup, t) 1218 e := tcpClearRREnv 1219 te := newTest(t, e) 1220 te.maxServerReceiveMsgSize = newInt(20) 1221 te.maxClientReceiveMsgSize = newInt(20) 1222 te.startServer(&testServer{security: e.security}) 1223 defer te.tearDown() 1224 cc, _ := te.clientConnWithConnControl() 1225 tc := &testServiceClientWrapper{TestServiceClient: testpb.NewTestServiceClient(cc)} 1226 1227 var svrID int64 1228 if err := verifyResultWithDelay(func() (bool, error) { 1229 ss, _ := channelz.GetServers(0, 0) 1230 if len(ss) != 1 { 1231 return false, fmt.Errorf("there should only be one server, not %d", len(ss)) 1232 } 1233 svrID = ss[0].ID 1234 return true, nil 1235 }); err != nil { 1236 t.Fatal(err) 1237 } 1238 1239 doSuccessfulUnaryCall(tc, t) 1240 if err := verifyResultWithDelay(func() (bool, error) { 1241 ns, _ := channelz.GetServerSockets(svrID, 0, 0) 1242 sktData := ns[0].SocketData 1243 if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 1 || sktData.StreamsFailed != 0 || sktData.MessagesSent != 1 || sktData.MessagesReceived != 1 { 1244 return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, MessagesSent, MessagesReceived) = (1, 1, 1, 1), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) 1245 } 1246 return true, nil 1247 }); err != nil { 1248 t.Fatal(err) 1249 } 1250 1251 doServerSideFailedUnaryCall(tc, t) 1252 if err := verifyResultWithDelay(func() (bool, error) { 1253 ns, _ := channelz.GetServerSockets(svrID, 0, 0) 1254 sktData := ns[0].SocketData 1255 if sktData.StreamsStarted != 2 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 0 || sktData.MessagesSent != 1 || sktData.MessagesReceived != 1 { 1256 return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (2, 2, 0, 1, 1), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) 1257 } 1258 return true, nil 1259 }); err != nil { 1260 t.Fatal(err) 1261 } 1262 1263 doClientSideInitiatedFailedStream(tc, t) 1264 if err := verifyResultWithDelay(func() (bool, error) { 1265 ns, _ := channelz.GetServerSockets(svrID, 0, 0) 1266 sktData := ns[0].SocketData 1267 if sktData.StreamsStarted != 3 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 1 || sktData.MessagesSent != 2 || sktData.MessagesReceived != 2 { 1268 return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (3, 2, 1, 2, 2), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) 1269 } 1270 return true, nil 1271 }); err != nil { 1272 t.Fatal(err) 1273 } 1274 } 1275 1276 func (s) TestCZServerSocketMetricsKeepAlive(t *testing.T) { 1277 czCleanup := channelz.NewChannelzStorage() 1278 defer czCleanupWrapper(czCleanup, t) 1279 e := tcpClearRREnv 1280 te := newTest(t, e) 1281 // We setup the server keepalive parameters to send one keepalive every 1282 // second, and verify that the actual number of keepalives is very close to 1283 // the number of seconds elapsed in the test. We had a bug wherein the 1284 // server was sending one keepalive every [Time+Timeout] instead of every 1285 // [Time] period, and since Timeout is configured to a low value here, we 1286 // should be able to verify that the fix works with the above mentioned 1287 // logic. 1288 kpOption := grpc.KeepaliveParams(keepalive.ServerParameters{ 1289 Time: time.Second, 1290 Timeout: 100 * time.Millisecond, 1291 }) 1292 te.customServerOptions = append(te.customServerOptions, kpOption) 1293 te.startServer(&testServer{security: e.security}) 1294 defer te.tearDown() 1295 cc := te.clientConn() 1296 tc := testpb.NewTestServiceClient(cc) 1297 start := time.Now() 1298 doIdleCallToInvokeKeepAlive(tc, t) 1299 1300 if err := verifyResultWithDelay(func() (bool, error) { 1301 ss, _ := channelz.GetServers(0, 0) 1302 if len(ss) != 1 { 1303 return false, fmt.Errorf("there should be one server, not %d", len(ss)) 1304 } 1305 ns, _ := channelz.GetServerSockets(ss[0].ID, 0, 0) 1306 if len(ns) != 1 { 1307 return false, fmt.Errorf("there should be one server normal socket, not %d", len(ns)) 1308 } 1309 wantKeepalivesCount := int64(time.Since(start).Seconds()) - 1 1310 if gotKeepalivesCount := ns[0].SocketData.KeepAlivesSent; gotKeepalivesCount != wantKeepalivesCount { 1311 return false, fmt.Errorf("got keepalivesCount: %v, want keepalivesCount: %v", gotKeepalivesCount, wantKeepalivesCount) 1312 } 1313 return true, nil 1314 }); err != nil { 1315 t.Fatal(err) 1316 } 1317 } 1318 1319 var cipherSuites = []string{ 1320 "TLS_RSA_WITH_RC4_128_SHA", 1321 "TLS_RSA_WITH_3DES_EDE_CBC_SHA", 1322 "TLS_RSA_WITH_AES_128_CBC_SHA", 1323 "TLS_RSA_WITH_AES_256_CBC_SHA", 1324 "TLS_RSA_WITH_AES_128_GCM_SHA256", 1325 "TLS_RSA_WITH_AES_256_GCM_SHA384", 1326 "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA", 1327 "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA", 1328 "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA", 1329 "TLS_ECDHE_RSA_WITH_RC4_128_SHA", 1330 "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA", 1331 "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA", 1332 "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA", 1333 "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", 1334 "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256", 1335 "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384", 1336 "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384", 1337 "TLS_FALLBACK_SCSV", 1338 "TLS_RSA_WITH_AES_128_CBC_SHA256", 1339 "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256", 1340 "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256", 1341 "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305", 1342 "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305", 1343 "TLS_AES_128_GCM_SHA256", 1344 "TLS_AES_256_GCM_SHA384", 1345 "TLS_CHACHA20_POLY1305_SHA256", 1346 } 1347 1348 func (s) TestCZSocketGetSecurityValueTLS(t *testing.T) { 1349 czCleanup := channelz.NewChannelzStorage() 1350 defer czCleanupWrapper(czCleanup, t) 1351 e := tcpTLSRREnv 1352 te := newTest(t, e) 1353 te.startServer(&testServer{security: e.security}) 1354 defer te.tearDown() 1355 te.clientConn() 1356 if err := verifyResultWithDelay(func() (bool, error) { 1357 tchan, _ := channelz.GetTopChannels(0, 0) 1358 if len(tchan) != 1 { 1359 return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) 1360 } 1361 if len(tchan[0].SubChans) != 1 { 1362 return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) 1363 } 1364 var id int64 1365 for id = range tchan[0].SubChans { 1366 break 1367 } 1368 sc := channelz.GetSubChannel(id) 1369 if sc == nil { 1370 return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id) 1371 } 1372 if len(sc.Sockets) != 1 { 1373 return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) 1374 } 1375 for id = range sc.Sockets { 1376 break 1377 } 1378 skt := channelz.GetSocket(id) 1379 cert, _ := tls.LoadX509KeyPair(testdata.Path("x509/server1_cert.pem"), testdata.Path("x509/server1_key.pem")) 1380 securityVal, ok := skt.SocketData.Security.(*credentials.TLSChannelzSecurityValue) 1381 if !ok { 1382 return false, fmt.Errorf("the SocketData.Security is of type: %T, want: *credentials.TLSChannelzSecurityValue", skt.SocketData.Security) 1383 } 1384 if !reflect.DeepEqual(securityVal.RemoteCertificate, cert.Certificate[0]) { 1385 return false, fmt.Errorf("SocketData.Security.RemoteCertificate got: %v, want: %v", securityVal.RemoteCertificate, cert.Certificate[0]) 1386 } 1387 for _, v := range cipherSuites { 1388 if v == securityVal.StandardName { 1389 return true, nil 1390 } 1391 } 1392 return false, fmt.Errorf("SocketData.Security.StandardName got: %v, want it to be one of %v", securityVal.StandardName, cipherSuites) 1393 }); err != nil { 1394 t.Fatal(err) 1395 } 1396 } 1397 1398 func (s) TestCZChannelTraceCreationDeletion(t *testing.T) { 1399 czCleanup := channelz.NewChannelzStorage() 1400 defer czCleanupWrapper(czCleanup, t) 1401 e := tcpClearRREnv 1402 // avoid calling API to set balancer type, which will void service config's change of balancer. 1403 e.balancer = "" 1404 te := newTest(t, e) 1405 r := manual.NewBuilderWithScheme("whatever") 1406 resolvedAddrs := []resolver.Address{{Addr: "127.0.0.1:0", Type: resolver.GRPCLB, ServerName: "grpclb.server"}} 1407 r.InitialState(resolver.State{Addresses: resolvedAddrs}) 1408 te.resolverScheme = r.Scheme() 1409 te.clientConn(grpc.WithResolvers(r)) 1410 defer te.tearDown() 1411 var nestedConn int64 1412 if err := verifyResultWithDelay(func() (bool, error) { 1413 tcs, _ := channelz.GetTopChannels(0, 0) 1414 if len(tcs) != 1 { 1415 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1416 } 1417 if len(tcs[0].NestedChans) != 1 { 1418 return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(tcs[0].NestedChans)) 1419 } 1420 for k := range tcs[0].NestedChans { 1421 nestedConn = k 1422 } 1423 for _, e := range tcs[0].Trace.Events { 1424 if e.RefID == nestedConn && e.RefType != channelz.RefChannel { 1425 return false, fmt.Errorf("nested channel trace event shoud have RefChannel as RefType") 1426 } 1427 } 1428 ncm := channelz.GetChannel(nestedConn) 1429 if ncm.Trace == nil { 1430 return false, fmt.Errorf("trace for nested channel should not be empty") 1431 } 1432 if len(ncm.Trace.Events) == 0 { 1433 return false, fmt.Errorf("there should be at least one trace event for nested channel not 0") 1434 } 1435 if ncm.Trace.Events[0].Desc != "Channel Created" { 1436 return false, fmt.Errorf("the first trace event should be \"Channel Created\", not %q", ncm.Trace.Events[0].Desc) 1437 } 1438 return true, nil 1439 }); err != nil { 1440 t.Fatal(err) 1441 } 1442 1443 r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "127.0.0.1:0"}}, ServiceConfig: parseCfg(r, `{"loadBalancingPolicy": "round_robin"}`)}) 1444 1445 // wait for the shutdown of grpclb balancer 1446 if err := verifyResultWithDelay(func() (bool, error) { 1447 tcs, _ := channelz.GetTopChannels(0, 0) 1448 if len(tcs) != 1 { 1449 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1450 } 1451 if len(tcs[0].NestedChans) != 0 { 1452 return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(tcs[0].NestedChans)) 1453 } 1454 ncm := channelz.GetChannel(nestedConn) 1455 if ncm == nil { 1456 return false, fmt.Errorf("nested channel should still exist due to parent's trace reference") 1457 } 1458 if ncm.Trace == nil { 1459 return false, fmt.Errorf("trace for nested channel should not be empty") 1460 } 1461 if len(ncm.Trace.Events) == 0 { 1462 return false, fmt.Errorf("there should be at least one trace event for nested channel not 0") 1463 } 1464 if ncm.Trace.Events[len(ncm.Trace.Events)-1].Desc != "Channel Deleted" { 1465 return false, fmt.Errorf("the first trace event should be \"Channel Deleted\", not %q", ncm.Trace.Events[0].Desc) 1466 } 1467 return true, nil 1468 }); err != nil { 1469 t.Fatal(err) 1470 } 1471 } 1472 1473 func (s) TestCZSubChannelTraceCreationDeletion(t *testing.T) { 1474 czCleanup := channelz.NewChannelzStorage() 1475 defer czCleanupWrapper(czCleanup, t) 1476 e := tcpClearRREnv 1477 te := newTest(t, e) 1478 te.startServer(&testServer{security: e.security}) 1479 r := manual.NewBuilderWithScheme("whatever") 1480 r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}}) 1481 te.resolverScheme = r.Scheme() 1482 te.clientConn(grpc.WithResolvers(r)) 1483 defer te.tearDown() 1484 var subConn int64 1485 // Here, we just wait for all sockets to be up. In the future, if we implement 1486 // IDLE, we may need to make several rpc calls to create the sockets. 1487 if err := verifyResultWithDelay(func() (bool, error) { 1488 tcs, _ := channelz.GetTopChannels(0, 0) 1489 if len(tcs) != 1 { 1490 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1491 } 1492 if len(tcs[0].SubChans) != 1 { 1493 return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) 1494 } 1495 for k := range tcs[0].SubChans { 1496 subConn = k 1497 } 1498 for _, e := range tcs[0].Trace.Events { 1499 if e.RefID == subConn && e.RefType != channelz.RefSubChannel { 1500 return false, fmt.Errorf("subchannel trace event shoud have RefType to be RefSubChannel") 1501 } 1502 } 1503 scm := channelz.GetSubChannel(subConn) 1504 if scm == nil { 1505 return false, fmt.Errorf("subChannel does not exist") 1506 } 1507 if scm.Trace == nil { 1508 return false, fmt.Errorf("trace for subChannel should not be empty") 1509 } 1510 if len(scm.Trace.Events) == 0 { 1511 return false, fmt.Errorf("there should be at least one trace event for subChannel not 0") 1512 } 1513 if scm.Trace.Events[0].Desc != "Subchannel Created" { 1514 return false, fmt.Errorf("the first trace event should be \"Subchannel Created\", not %q", scm.Trace.Events[0].Desc) 1515 } 1516 return true, nil 1517 }); err != nil { 1518 t.Fatal(err) 1519 } 1520 1521 // Wait for ready 1522 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 1523 defer cancel() 1524 for src := te.cc.GetState(); src != connectivity.Ready; src = te.cc.GetState() { 1525 if !te.cc.WaitForStateChange(ctx, src) { 1526 t.Fatalf("timed out waiting for state change. got %v; want %v", src, connectivity.Ready) 1527 } 1528 } 1529 r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "fake address"}}}) 1530 // Wait for not-ready. 1531 for src := te.cc.GetState(); src == connectivity.Ready; src = te.cc.GetState() { 1532 if !te.cc.WaitForStateChange(ctx, src) { 1533 t.Fatalf("timed out waiting for state change. got %v; want !%v", src, connectivity.Ready) 1534 } 1535 } 1536 1537 if err := verifyResultWithDelay(func() (bool, error) { 1538 tcs, _ := channelz.GetTopChannels(0, 0) 1539 if len(tcs) != 1 { 1540 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1541 } 1542 if len(tcs[0].SubChans) != 1 { 1543 return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) 1544 } 1545 scm := channelz.GetSubChannel(subConn) 1546 if scm == nil { 1547 return false, fmt.Errorf("subChannel should still exist due to parent's trace reference") 1548 } 1549 if scm.Trace == nil { 1550 return false, fmt.Errorf("trace for SubChannel should not be empty") 1551 } 1552 if len(scm.Trace.Events) == 0 { 1553 return false, fmt.Errorf("there should be at least one trace event for subChannel not 0") 1554 } 1555 if got, want := scm.Trace.Events[len(scm.Trace.Events)-1].Desc, "Subchannel Deleted"; got != want { 1556 return false, fmt.Errorf("the last trace event should be %q, not %q", want, got) 1557 } 1558 1559 return true, nil 1560 }); err != nil { 1561 t.Fatal(err) 1562 } 1563 } 1564 1565 func (s) TestCZChannelAddressResolutionChange(t *testing.T) { 1566 czCleanup := channelz.NewChannelzStorage() 1567 defer czCleanupWrapper(czCleanup, t) 1568 e := tcpClearRREnv 1569 e.balancer = "" 1570 te := newTest(t, e) 1571 te.startServer(&testServer{security: e.security}) 1572 r := manual.NewBuilderWithScheme("whatever") 1573 addrs := []resolver.Address{{Addr: te.srvAddr}} 1574 r.InitialState(resolver.State{Addresses: addrs}) 1575 te.resolverScheme = r.Scheme() 1576 te.clientConn(grpc.WithResolvers(r)) 1577 defer te.tearDown() 1578 var cid int64 1579 // Here, we just wait for all sockets to be up. In the future, if we implement 1580 // IDLE, we may need to make several rpc calls to create the sockets. 1581 if err := verifyResultWithDelay(func() (bool, error) { 1582 tcs, _ := channelz.GetTopChannels(0, 0) 1583 if len(tcs) != 1 { 1584 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1585 } 1586 cid = tcs[0].ID 1587 for i := len(tcs[0].Trace.Events) - 1; i >= 0; i-- { 1588 if strings.Contains(tcs[0].Trace.Events[i].Desc, "resolver returned new addresses") { 1589 break 1590 } 1591 if i == 0 { 1592 return false, fmt.Errorf("events do not contain expected address resolution from empty address state. Got: %+v", tcs[0].Trace.Events) 1593 } 1594 } 1595 return true, nil 1596 }); err != nil { 1597 t.Fatal(err) 1598 } 1599 r.UpdateState(resolver.State{Addresses: addrs, ServiceConfig: parseCfg(r, `{"loadBalancingPolicy": "round_robin"}`)}) 1600 1601 if err := verifyResultWithDelay(func() (bool, error) { 1602 cm := channelz.GetChannel(cid) 1603 for i := len(cm.Trace.Events) - 1; i >= 0; i-- { 1604 if cm.Trace.Events[i].Desc == fmt.Sprintf("Channel switches to new LB policy %q", roundrobin.Name) { 1605 break 1606 } 1607 if i == 0 { 1608 return false, fmt.Errorf("events do not contain expected address resolution change of LB policy") 1609 } 1610 } 1611 return true, nil 1612 }); err != nil { 1613 t.Fatal(err) 1614 } 1615 1616 newSC := parseCfg(r, `{ 1617 "methodConfig": [ 1618 { 1619 "name": [ 1620 { 1621 "service": "grpc.testing.TestService", 1622 "method": "EmptyCall" 1623 } 1624 ], 1625 "waitForReady": false, 1626 "timeout": ".001s" 1627 } 1628 ] 1629 }`) 1630 r.UpdateState(resolver.State{Addresses: addrs, ServiceConfig: newSC}) 1631 1632 if err := verifyResultWithDelay(func() (bool, error) { 1633 cm := channelz.GetChannel(cid) 1634 1635 var es []string 1636 for i := len(cm.Trace.Events) - 1; i >= 0; i-- { 1637 if strings.Contains(cm.Trace.Events[i].Desc, "service config updated") { 1638 break 1639 } 1640 es = append(es, cm.Trace.Events[i].Desc) 1641 if i == 0 { 1642 return false, fmt.Errorf("events do not contain expected address resolution of new service config\n Events:\n%v", strings.Join(es, "\n")) 1643 } 1644 } 1645 return true, nil 1646 }); err != nil { 1647 t.Fatal(err) 1648 } 1649 1650 r.UpdateState(resolver.State{Addresses: []resolver.Address{}, ServiceConfig: newSC}) 1651 1652 if err := verifyResultWithDelay(func() (bool, error) { 1653 cm := channelz.GetChannel(cid) 1654 for i := len(cm.Trace.Events) - 1; i >= 0; i-- { 1655 if strings.Contains(cm.Trace.Events[i].Desc, "resolver returned an empty address list") { 1656 break 1657 } 1658 if i == 0 { 1659 return false, fmt.Errorf("events do not contain expected address resolution of empty address") 1660 } 1661 } 1662 return true, nil 1663 }); err != nil { 1664 t.Fatal(err) 1665 } 1666 } 1667 1668 func (s) TestCZSubChannelPickedNewAddress(t *testing.T) { 1669 czCleanup := channelz.NewChannelzStorage() 1670 defer czCleanupWrapper(czCleanup, t) 1671 e := tcpClearRREnv 1672 e.balancer = "" 1673 te := newTest(t, e) 1674 te.startServers(&testServer{security: e.security}, 3) 1675 r := manual.NewBuilderWithScheme("whatever") 1676 var svrAddrs []resolver.Address 1677 for _, a := range te.srvAddrs { 1678 svrAddrs = append(svrAddrs, resolver.Address{Addr: a}) 1679 } 1680 r.InitialState(resolver.State{Addresses: svrAddrs}) 1681 te.resolverScheme = r.Scheme() 1682 cc := te.clientConn(grpc.WithResolvers(r)) 1683 defer te.tearDown() 1684 tc := testpb.NewTestServiceClient(cc) 1685 // make sure the connection is up 1686 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 1687 defer cancel() 1688 if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil { 1689 t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err) 1690 } 1691 te.srvs[0].Stop() 1692 te.srvs[1].Stop() 1693 // Here, we just wait for all sockets to be up. Make several rpc calls to 1694 // create the sockets since we do not automatically reconnect. 1695 done := make(chan struct{}) 1696 defer close(done) 1697 go func() { 1698 for { 1699 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 1700 tc.EmptyCall(ctx, &testpb.Empty{}) 1701 cancel() 1702 select { 1703 case <-time.After(10 * time.Millisecond): 1704 case <-done: 1705 return 1706 } 1707 } 1708 }() 1709 if err := verifyResultWithDelay(func() (bool, error) { 1710 tcs, _ := channelz.GetTopChannels(0, 0) 1711 if len(tcs) != 1 { 1712 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1713 } 1714 if len(tcs[0].SubChans) != 1 { 1715 return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) 1716 } 1717 var subConn int64 1718 for k := range tcs[0].SubChans { 1719 subConn = k 1720 } 1721 scm := channelz.GetSubChannel(subConn) 1722 if scm.Trace == nil { 1723 return false, fmt.Errorf("trace for SubChannel should not be empty") 1724 } 1725 if len(scm.Trace.Events) == 0 { 1726 return false, fmt.Errorf("there should be at least one trace event for subChannel not 0") 1727 } 1728 for i := len(scm.Trace.Events) - 1; i >= 0; i-- { 1729 if scm.Trace.Events[i].Desc == fmt.Sprintf("Subchannel picks a new address %q to connect", te.srvAddrs[2]) { 1730 break 1731 } 1732 if i == 0 { 1733 return false, fmt.Errorf("events do not contain expected address resolution of subchannel picked new address") 1734 } 1735 } 1736 return true, nil 1737 }); err != nil { 1738 t.Fatal(err) 1739 } 1740 } 1741 1742 func (s) TestCZSubChannelConnectivityState(t *testing.T) { 1743 czCleanup := channelz.NewChannelzStorage() 1744 defer czCleanupWrapper(czCleanup, t) 1745 e := tcpClearRREnv 1746 te := newTest(t, e) 1747 te.startServer(&testServer{security: e.security}) 1748 r := manual.NewBuilderWithScheme("whatever") 1749 r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}}) 1750 te.resolverScheme = r.Scheme() 1751 cc := te.clientConn(grpc.WithResolvers(r)) 1752 defer te.tearDown() 1753 tc := testpb.NewTestServiceClient(cc) 1754 // make sure the connection is up 1755 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 1756 defer cancel() 1757 if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil { 1758 t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err) 1759 } 1760 var subConn int64 1761 te.srv.Stop() 1762 1763 if err := verifyResultWithDelay(func() (bool, error) { 1764 // we need to obtain the SubChannel id before it gets deleted from Channel's children list (due 1765 // to effect of r.UpdateState(resolver.State{Addresses:[]resolver.Address{}})) 1766 if subConn == 0 { 1767 tcs, _ := channelz.GetTopChannels(0, 0) 1768 if len(tcs) != 1 { 1769 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1770 } 1771 if len(tcs[0].SubChans) != 1 { 1772 return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) 1773 } 1774 for k := range tcs[0].SubChans { 1775 // get the SubChannel id for further trace inquiry. 1776 subConn = k 1777 } 1778 } 1779 scm := channelz.GetSubChannel(subConn) 1780 if scm == nil { 1781 return false, fmt.Errorf("subChannel should still exist due to parent's trace reference") 1782 } 1783 if scm.Trace == nil { 1784 return false, fmt.Errorf("trace for SubChannel should not be empty") 1785 } 1786 if len(scm.Trace.Events) == 0 { 1787 return false, fmt.Errorf("there should be at least one trace event for subChannel not 0") 1788 } 1789 var ready, connecting, transient, shutdown int 1790 for _, e := range scm.Trace.Events { 1791 if e.Desc == fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.TransientFailure) { 1792 transient++ 1793 } 1794 } 1795 // Make sure the SubChannel has already seen transient failure before shutting it down through 1796 // r.UpdateState(resolver.State{Addresses:[]resolver.Address{}}). 1797 if transient == 0 { 1798 return false, fmt.Errorf("transient failure has not happened on SubChannel yet") 1799 } 1800 transient = 0 1801 r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "fake address"}}}) 1802 for _, e := range scm.Trace.Events { 1803 if e.Desc == fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.Ready) { 1804 ready++ 1805 } 1806 if e.Desc == fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.Connecting) { 1807 connecting++ 1808 } 1809 if e.Desc == fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.TransientFailure) { 1810 transient++ 1811 } 1812 if e.Desc == fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.Shutdown) { 1813 shutdown++ 1814 } 1815 } 1816 // example: 1817 // Subchannel Created 1818 // Subchannel's connectivity state changed to CONNECTING 1819 // Subchannel picked a new address: "localhost:36011" 1820 // Subchannel's connectivity state changed to READY 1821 // Subchannel's connectivity state changed to TRANSIENT_FAILURE 1822 // Subchannel's connectivity state changed to CONNECTING 1823 // Subchannel picked a new address: "localhost:36011" 1824 // Subchannel's connectivity state changed to SHUTDOWN 1825 // Subchannel Deleted 1826 if ready != 1 || connecting < 1 || transient < 1 || shutdown != 1 { 1827 return false, fmt.Errorf("got: ready = %d, connecting = %d, transient = %d, shutdown = %d, want: 1, >=1, >=1, 1", ready, connecting, transient, shutdown) 1828 } 1829 1830 return true, nil 1831 }); err != nil { 1832 t.Fatal(err) 1833 } 1834 } 1835 1836 func (s) TestCZChannelConnectivityState(t *testing.T) { 1837 czCleanup := channelz.NewChannelzStorage() 1838 defer czCleanupWrapper(czCleanup, t) 1839 e := tcpClearRREnv 1840 te := newTest(t, e) 1841 te.startServer(&testServer{security: e.security}) 1842 r := manual.NewBuilderWithScheme("whatever") 1843 r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}}) 1844 te.resolverScheme = r.Scheme() 1845 cc := te.clientConn(grpc.WithResolvers(r)) 1846 defer te.tearDown() 1847 tc := testpb.NewTestServiceClient(cc) 1848 // make sure the connection is up 1849 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 1850 defer cancel() 1851 if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil { 1852 t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err) 1853 } 1854 te.srv.Stop() 1855 if err := verifyResultWithDelay(func() (bool, error) { 1856 tcs, _ := channelz.GetTopChannels(0, 0) 1857 if len(tcs) != 1 { 1858 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1859 } 1860 1861 var ready, connecting, transient int 1862 for _, e := range tcs[0].Trace.Events { 1863 if e.Desc == fmt.Sprintf("Channel Connectivity change to %v", connectivity.Ready) { 1864 ready++ 1865 } 1866 if e.Desc == fmt.Sprintf("Channel Connectivity change to %v", connectivity.Connecting) { 1867 connecting++ 1868 } 1869 if e.Desc == fmt.Sprintf("Channel Connectivity change to %v", connectivity.TransientFailure) { 1870 transient++ 1871 } 1872 } 1873 1874 // example: 1875 // Channel Created 1876 // Adressses resolved (from empty address state): "localhost:40467" 1877 // SubChannel (id: 4[]) Created 1878 // Channel's connectivity state changed to CONNECTING 1879 // Channel's connectivity state changed to READY 1880 // Channel's connectivity state changed to TRANSIENT_FAILURE 1881 // Channel's connectivity state changed to CONNECTING 1882 // Channel's connectivity state changed to TRANSIENT_FAILURE 1883 if ready != 1 || connecting < 1 || transient < 1 { 1884 return false, fmt.Errorf("got: ready = %d, connecting = %d, transient = %d, want: 1, >=1, >=1", ready, connecting, transient) 1885 } 1886 return true, nil 1887 }); err != nil { 1888 t.Fatal(err) 1889 } 1890 } 1891 1892 func (s) TestCZTraceOverwriteChannelDeletion(t *testing.T) { 1893 czCleanup := channelz.NewChannelzStorage() 1894 defer czCleanupWrapper(czCleanup, t) 1895 e := tcpClearRREnv 1896 // avoid newTest using WithBalancerName, which would override service 1897 // config's change of balancer below. 1898 e.balancer = "" 1899 te := newTest(t, e) 1900 channelz.SetMaxTraceEntry(1) 1901 defer channelz.ResetMaxTraceEntryToDefault() 1902 r := manual.NewBuilderWithScheme("whatever") 1903 resolvedAddrs := []resolver.Address{{Addr: "127.0.0.1:0", Type: resolver.GRPCLB, ServerName: "grpclb.server"}} 1904 r.InitialState(resolver.State{Addresses: resolvedAddrs}) 1905 te.resolverScheme = r.Scheme() 1906 te.clientConn(grpc.WithResolvers(r)) 1907 defer te.tearDown() 1908 var nestedConn int64 1909 if err := verifyResultWithDelay(func() (bool, error) { 1910 tcs, _ := channelz.GetTopChannels(0, 0) 1911 if len(tcs) != 1 { 1912 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1913 } 1914 if len(tcs[0].NestedChans) != 1 { 1915 return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(tcs[0].NestedChans)) 1916 } 1917 for k := range tcs[0].NestedChans { 1918 nestedConn = k 1919 } 1920 return true, nil 1921 }); err != nil { 1922 t.Fatal(err) 1923 } 1924 1925 r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "127.0.0.1:0"}}, ServiceConfig: parseCfg(r, `{"loadBalancingPolicy": "round_robin"}`)}) 1926 1927 // wait for the shutdown of grpclb balancer 1928 if err := verifyResultWithDelay(func() (bool, error) { 1929 tcs, _ := channelz.GetTopChannels(0, 0) 1930 if len(tcs) != 1 { 1931 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1932 } 1933 if len(tcs[0].NestedChans) != 0 { 1934 return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(tcs[0].NestedChans)) 1935 } 1936 return true, nil 1937 }); err != nil { 1938 t.Fatal(err) 1939 } 1940 1941 // If nested channel deletion is last trace event before the next validation, it will fail, as the top channel will hold a reference to it. 1942 // This line forces a trace event on the top channel in that case. 1943 r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "127.0.0.1:0"}}, ServiceConfig: parseCfg(r, `{"loadBalancingPolicy": "round_robin"}`)}) 1944 1945 // verify that the nested channel no longer exist due to trace referencing it got overwritten. 1946 if err := verifyResultWithDelay(func() (bool, error) { 1947 cm := channelz.GetChannel(nestedConn) 1948 if cm != nil { 1949 return false, fmt.Errorf("nested channel should have been deleted since its parent's trace should not contain any reference to it anymore") 1950 } 1951 return true, nil 1952 }); err != nil { 1953 t.Fatal(err) 1954 } 1955 } 1956 1957 func (s) TestCZTraceOverwriteSubChannelDeletion(t *testing.T) { 1958 czCleanup := channelz.NewChannelzStorage() 1959 defer czCleanupWrapper(czCleanup, t) 1960 e := tcpClearRREnv 1961 te := newTest(t, e) 1962 channelz.SetMaxTraceEntry(1) 1963 defer channelz.ResetMaxTraceEntryToDefault() 1964 te.startServer(&testServer{security: e.security}) 1965 r := manual.NewBuilderWithScheme("whatever") 1966 r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}}) 1967 te.resolverScheme = r.Scheme() 1968 te.clientConn(grpc.WithResolvers(r)) 1969 defer te.tearDown() 1970 var subConn int64 1971 // Here, we just wait for all sockets to be up. In the future, if we implement 1972 // IDLE, we may need to make several rpc calls to create the sockets. 1973 if err := verifyResultWithDelay(func() (bool, error) { 1974 tcs, _ := channelz.GetTopChannels(0, 0) 1975 if len(tcs) != 1 { 1976 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 1977 } 1978 if len(tcs[0].SubChans) != 1 { 1979 return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) 1980 } 1981 for k := range tcs[0].SubChans { 1982 subConn = k 1983 } 1984 return true, nil 1985 }); err != nil { 1986 t.Fatal(err) 1987 } 1988 1989 // Wait for ready 1990 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 1991 defer cancel() 1992 for src := te.cc.GetState(); src != connectivity.Ready; src = te.cc.GetState() { 1993 if !te.cc.WaitForStateChange(ctx, src) { 1994 t.Fatalf("timed out waiting for state change. got %v; want %v", src, connectivity.Ready) 1995 } 1996 } 1997 r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "fake address"}}}) 1998 // Wait for not-ready. 1999 for src := te.cc.GetState(); src == connectivity.Ready; src = te.cc.GetState() { 2000 if !te.cc.WaitForStateChange(ctx, src) { 2001 t.Fatalf("timed out waiting for state change. got %v; want !%v", src, connectivity.Ready) 2002 } 2003 } 2004 2005 // verify that the subchannel no longer exist due to trace referencing it got overwritten. 2006 if err := verifyResultWithDelay(func() (bool, error) { 2007 cm := channelz.GetChannel(subConn) 2008 if cm != nil { 2009 return false, fmt.Errorf("subchannel should have been deleted since its parent's trace should not contain any reference to it anymore") 2010 } 2011 return true, nil 2012 }); err != nil { 2013 t.Fatal(err) 2014 } 2015 } 2016 2017 func (s) TestCZTraceTopChannelDeletionTraceClear(t *testing.T) { 2018 czCleanup := channelz.NewChannelzStorage() 2019 defer czCleanupWrapper(czCleanup, t) 2020 e := tcpClearRREnv 2021 te := newTest(t, e) 2022 te.startServer(&testServer{security: e.security}) 2023 r := manual.NewBuilderWithScheme("whatever") 2024 r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}}) 2025 te.resolverScheme = r.Scheme() 2026 te.clientConn(grpc.WithResolvers(r)) 2027 var subConn int64 2028 // Here, we just wait for all sockets to be up. In the future, if we implement 2029 // IDLE, we may need to make several rpc calls to create the sockets. 2030 if err := verifyResultWithDelay(func() (bool, error) { 2031 tcs, _ := channelz.GetTopChannels(0, 0) 2032 if len(tcs) != 1 { 2033 return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) 2034 } 2035 if len(tcs[0].SubChans) != 1 { 2036 return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) 2037 } 2038 for k := range tcs[0].SubChans { 2039 subConn = k 2040 } 2041 return true, nil 2042 }); err != nil { 2043 t.Fatal(err) 2044 } 2045 te.tearDown() 2046 // verify that the subchannel no longer exist due to parent channel got deleted and its trace cleared. 2047 if err := verifyResultWithDelay(func() (bool, error) { 2048 cm := channelz.GetChannel(subConn) 2049 if cm != nil { 2050 return false, fmt.Errorf("subchannel should have been deleted since its parent's trace should not contain any reference to it anymore") 2051 } 2052 return true, nil 2053 }); err != nil { 2054 t.Fatal(err) 2055 } 2056 }