google.golang.org/grpc@v1.62.1/test/channelz_test.go (about)

     1  /*
     2   *
     3   * Copyright 2018 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package test
    20  
    21  import (
    22  	"context"
    23  	"crypto/tls"
    24  	"fmt"
    25  	"net"
    26  	"regexp"
    27  	"strings"
    28  	"sync"
    29  	"testing"
    30  	"time"
    31  
    32  	"github.com/google/go-cmp/cmp"
    33  	"golang.org/x/net/http2"
    34  	"google.golang.org/grpc"
    35  	_ "google.golang.org/grpc/balancer/grpclb"
    36  	grpclbstate "google.golang.org/grpc/balancer/grpclb/state"
    37  	"google.golang.org/grpc/balancer/roundrobin"
    38  	"google.golang.org/grpc/codes"
    39  	"google.golang.org/grpc/connectivity"
    40  	"google.golang.org/grpc/credentials"
    41  	"google.golang.org/grpc/internal"
    42  	"google.golang.org/grpc/internal/channelz"
    43  	"google.golang.org/grpc/internal/stubserver"
    44  	"google.golang.org/grpc/internal/testutils"
    45  	"google.golang.org/grpc/keepalive"
    46  	"google.golang.org/grpc/resolver"
    47  	"google.golang.org/grpc/resolver/manual"
    48  	"google.golang.org/grpc/status"
    49  	"google.golang.org/grpc/testdata"
    50  
    51  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    52  	testpb "google.golang.org/grpc/interop/grpc_testing"
    53  )
    54  
    55  func verifyResultWithDelay(f func() (bool, error)) error {
    56  	var ok bool
    57  	var err error
    58  	for i := 0; i < 1000; i++ {
    59  		if ok, err = f(); ok {
    60  			return nil
    61  		}
    62  		time.Sleep(10 * time.Millisecond)
    63  	}
    64  	return err
    65  }
    66  
    67  func (s) TestCZServerRegistrationAndDeletion(t *testing.T) {
    68  	testcases := []struct {
    69  		total  int
    70  		start  int64
    71  		max    int64
    72  		length int64
    73  		end    bool
    74  	}{
    75  		{total: int(channelz.EntryPerPage), start: 0, max: 0, length: channelz.EntryPerPage, end: true},
    76  		{total: int(channelz.EntryPerPage) - 1, start: 0, max: 0, length: channelz.EntryPerPage - 1, end: true},
    77  		{total: int(channelz.EntryPerPage) + 1, start: 0, max: 0, length: channelz.EntryPerPage, end: false},
    78  		{total: int(channelz.EntryPerPage) + 1, start: int64(2*(channelz.EntryPerPage+1) + 1), max: 0, length: 0, end: true},
    79  		{total: int(channelz.EntryPerPage), start: 0, max: 1, length: 1, end: false},
    80  		{total: int(channelz.EntryPerPage), start: 0, max: channelz.EntryPerPage - 1, length: channelz.EntryPerPage - 1, end: false},
    81  	}
    82  
    83  	for i, c := range testcases {
    84  		// Reset channelz IDs so `start` is valid.
    85  		channelz.IDGen.Reset()
    86  
    87  		e := tcpClearRREnv
    88  		te := newTest(t, e)
    89  		te.startServers(&testServer{security: e.security}, c.total)
    90  
    91  		ss, end := channelz.GetServers(c.start, c.max)
    92  		if int64(len(ss)) != c.length || end != c.end {
    93  			t.Fatalf("%d: GetServers(%d) = %+v (len of which: %d), end: %+v, want len(GetServers(%d)) = %d, end: %+v", i, c.start, ss, len(ss), end, c.start, c.length, c.end)
    94  		}
    95  		te.tearDown()
    96  		ss, end = channelz.GetServers(c.start, c.max)
    97  		if len(ss) != 0 || !end {
    98  			t.Fatalf("%d: GetServers(0) = %+v (len of which: %d), end: %+v, want len(GetServers(0)) = 0, end: true", i, ss, len(ss), end)
    99  		}
   100  	}
   101  }
   102  
   103  func (s) TestCZGetServer(t *testing.T) {
   104  	e := tcpClearRREnv
   105  	te := newTest(t, e)
   106  	te.startServer(&testServer{security: e.security})
   107  	defer te.tearDown()
   108  
   109  	ss, _ := channelz.GetServers(0, 0)
   110  	if len(ss) != 1 {
   111  		t.Fatalf("there should only be one server, not %d", len(ss))
   112  	}
   113  
   114  	serverID := ss[0].ID
   115  	srv := channelz.GetServer(serverID)
   116  	if srv == nil {
   117  		t.Fatalf("server %d does not exist", serverID)
   118  	}
   119  	if srv.ID != serverID {
   120  		t.Fatalf("server want id %d, but got %d", serverID, srv.ID)
   121  	}
   122  
   123  	te.tearDown()
   124  
   125  	if err := verifyResultWithDelay(func() (bool, error) {
   126  		srv := channelz.GetServer(serverID)
   127  		if srv != nil {
   128  			return false, fmt.Errorf("server %d should not exist", serverID)
   129  		}
   130  
   131  		return true, nil
   132  	}); err != nil {
   133  		t.Fatal(err)
   134  	}
   135  }
   136  
   137  func (s) TestCZTopChannelRegistrationAndDeletion(t *testing.T) {
   138  	testcases := []struct {
   139  		total  int
   140  		start  int64
   141  		max    int64
   142  		length int64
   143  		end    bool
   144  	}{
   145  		{total: int(channelz.EntryPerPage), start: 0, max: 0, length: channelz.EntryPerPage, end: true},
   146  		{total: int(channelz.EntryPerPage) - 1, start: 0, max: 0, length: channelz.EntryPerPage - 1, end: true},
   147  		{total: int(channelz.EntryPerPage) + 1, start: 0, max: 0, length: channelz.EntryPerPage, end: false},
   148  		{total: int(channelz.EntryPerPage) + 1, start: int64(2*(channelz.EntryPerPage+1) + 1), max: 0, length: 0, end: true},
   149  		{total: int(channelz.EntryPerPage), start: 0, max: 1, length: 1, end: false},
   150  		{total: int(channelz.EntryPerPage), start: 0, max: channelz.EntryPerPage - 1, length: channelz.EntryPerPage - 1, end: false},
   151  	}
   152  
   153  	for _, c := range testcases {
   154  		// Reset channelz IDs so `start` is valid.
   155  		channelz.IDGen.Reset()
   156  
   157  		e := tcpClearRREnv
   158  		te := newTest(t, e)
   159  		var ccs []*grpc.ClientConn
   160  		for i := 0; i < c.total; i++ {
   161  			cc := te.clientConn()
   162  			te.cc = nil
   163  			// avoid making next dial blocking
   164  			te.srvAddr = ""
   165  			ccs = append(ccs, cc)
   166  		}
   167  		if err := verifyResultWithDelay(func() (bool, error) {
   168  			if tcs, end := channelz.GetTopChannels(c.start, c.max); int64(len(tcs)) != c.length || end != c.end {
   169  				return false, fmt.Errorf("getTopChannels(%d) = %+v (len of which: %d), end: %+v, want len(GetTopChannels(%d)) = %d, end: %+v", c.start, tcs, len(tcs), end, c.start, c.length, c.end)
   170  			}
   171  			return true, nil
   172  		}); err != nil {
   173  			t.Fatal(err)
   174  		}
   175  
   176  		for _, cc := range ccs {
   177  			cc.Close()
   178  		}
   179  
   180  		if err := verifyResultWithDelay(func() (bool, error) {
   181  			if tcs, end := channelz.GetTopChannels(c.start, c.max); len(tcs) != 0 || !end {
   182  				return false, fmt.Errorf("getTopChannels(0) = %+v (len of which: %d), end: %+v, want len(GetTopChannels(0)) = 0, end: true", tcs, len(tcs), end)
   183  			}
   184  			return true, nil
   185  		}); err != nil {
   186  			t.Fatal(err)
   187  		}
   188  		te.tearDown()
   189  	}
   190  }
   191  
   192  func (s) TestCZTopChannelRegistrationAndDeletionWhenDialFail(t *testing.T) {
   193  	// Make dial fails (due to no transport security specified)
   194  	_, err := grpc.Dial("fake.addr")
   195  	if err == nil {
   196  		t.Fatal("expecting dial to fail")
   197  	}
   198  	if tcs, end := channelz.GetTopChannels(0, 0); tcs != nil || !end {
   199  		t.Fatalf("GetTopChannels(0, 0) = %v, %v, want <nil>, true", tcs, end)
   200  	}
   201  }
   202  
   203  func (s) TestCZNestedChannelRegistrationAndDeletion(t *testing.T) {
   204  	e := tcpClearRREnv
   205  	// avoid calling API to set balancer type, which will void service config's change of balancer.
   206  	e.balancer = ""
   207  	te := newTest(t, e)
   208  	r := manual.NewBuilderWithScheme("whatever")
   209  	te.resolverScheme = r.Scheme()
   210  	te.clientConn(grpc.WithResolvers(r))
   211  	resolvedAddrs := []resolver.Address{{Addr: "127.0.0.1:0", ServerName: "grpclb.server"}}
   212  	grpclbConfig := parseServiceConfig(t, r, `{"loadBalancingPolicy": "grpclb"}`)
   213  	r.UpdateState(grpclbstate.Set(resolver.State{ServiceConfig: grpclbConfig}, &grpclbstate.State{BalancerAddresses: resolvedAddrs}))
   214  	defer te.tearDown()
   215  
   216  	if err := verifyResultWithDelay(func() (bool, error) {
   217  		tcs, _ := channelz.GetTopChannels(0, 0)
   218  		if len(tcs) != 1 {
   219  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
   220  		}
   221  		if len(tcs[0].NestedChans) != 1 {
   222  			return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(tcs[0].NestedChans))
   223  		}
   224  		return true, nil
   225  	}); err != nil {
   226  		t.Fatal(err)
   227  	}
   228  
   229  	r.UpdateState(resolver.State{
   230  		Addresses:     []resolver.Address{{Addr: "127.0.0.1:0"}},
   231  		ServiceConfig: parseServiceConfig(t, r, `{"loadBalancingPolicy": "round_robin"}`),
   232  	})
   233  
   234  	// wait for the shutdown of grpclb balancer
   235  	if err := verifyResultWithDelay(func() (bool, error) {
   236  		tcs, _ := channelz.GetTopChannels(0, 0)
   237  		if len(tcs) != 1 {
   238  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
   239  		}
   240  		if len(tcs[0].NestedChans) != 0 {
   241  			return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(tcs[0].NestedChans))
   242  		}
   243  		return true, nil
   244  	}); err != nil {
   245  		t.Fatal(err)
   246  	}
   247  }
   248  
   249  func (s) TestCZClientSubChannelSocketRegistrationAndDeletion(t *testing.T) {
   250  	e := tcpClearRREnv
   251  	num := 3 // number of backends
   252  	te := newTest(t, e)
   253  	var svrAddrs []resolver.Address
   254  	te.startServers(&testServer{security: e.security}, num)
   255  	r := manual.NewBuilderWithScheme("whatever")
   256  	for _, a := range te.srvAddrs {
   257  		svrAddrs = append(svrAddrs, resolver.Address{Addr: a})
   258  	}
   259  	r.InitialState(resolver.State{Addresses: svrAddrs})
   260  	te.resolverScheme = r.Scheme()
   261  	te.clientConn(grpc.WithResolvers(r))
   262  	defer te.tearDown()
   263  	// Here, we just wait for all sockets to be up. In the future, if we implement
   264  	// IDLE, we may need to make several rpc calls to create the sockets.
   265  	if err := verifyResultWithDelay(func() (bool, error) {
   266  		tcs, _ := channelz.GetTopChannels(0, 0)
   267  		if len(tcs) != 1 {
   268  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
   269  		}
   270  		if len(tcs[0].SubChans) != num {
   271  			return false, fmt.Errorf("there should be %d subchannel not %d", num, len(tcs[0].SubChans))
   272  		}
   273  		count := 0
   274  		for k := range tcs[0].SubChans {
   275  			sc := channelz.GetSubChannel(k)
   276  			if sc == nil {
   277  				return false, fmt.Errorf("got <nil> subchannel")
   278  			}
   279  			count += len(sc.Sockets)
   280  		}
   281  		if count != num {
   282  			return false, fmt.Errorf("there should be %d sockets not %d", num, count)
   283  		}
   284  
   285  		return true, nil
   286  	}); err != nil {
   287  		t.Fatal(err)
   288  	}
   289  
   290  	r.UpdateState(resolver.State{Addresses: svrAddrs[:len(svrAddrs)-1]})
   291  
   292  	if err := verifyResultWithDelay(func() (bool, error) {
   293  		tcs, _ := channelz.GetTopChannels(0, 0)
   294  		if len(tcs) != 1 {
   295  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
   296  		}
   297  		if len(tcs[0].SubChans) != num-1 {
   298  			return false, fmt.Errorf("there should be %d subchannel not %d", num-1, len(tcs[0].SubChans))
   299  		}
   300  		count := 0
   301  		for k := range tcs[0].SubChans {
   302  			sc := channelz.GetSubChannel(k)
   303  			if sc == nil {
   304  				return false, fmt.Errorf("got <nil> subchannel")
   305  			}
   306  			count += len(sc.Sockets)
   307  		}
   308  		if count != num-1 {
   309  			return false, fmt.Errorf("there should be %d sockets not %d", num-1, count)
   310  		}
   311  
   312  		return true, nil
   313  	}); err != nil {
   314  		t.Fatal(err)
   315  	}
   316  }
   317  
   318  func (s) TestCZServerSocketRegistrationAndDeletion(t *testing.T) {
   319  	testcases := []struct {
   320  		total  int
   321  		start  int64
   322  		max    int64
   323  		length int64
   324  		end    bool
   325  	}{
   326  		{total: int(channelz.EntryPerPage), start: 0, max: 0, length: channelz.EntryPerPage, end: true},
   327  		{total: int(channelz.EntryPerPage) - 1, start: 0, max: 0, length: channelz.EntryPerPage - 1, end: true},
   328  		{total: int(channelz.EntryPerPage) + 1, start: 0, max: 0, length: channelz.EntryPerPage, end: false},
   329  		{total: int(channelz.EntryPerPage), start: 1, max: 0, length: channelz.EntryPerPage - 1, end: true},
   330  		{total: int(channelz.EntryPerPage) + 1, start: channelz.EntryPerPage + 1, max: 0, length: 0, end: true},
   331  		{total: int(channelz.EntryPerPage), start: 0, max: 1, length: 1, end: false},
   332  		{total: int(channelz.EntryPerPage), start: 0, max: channelz.EntryPerPage - 1, length: channelz.EntryPerPage - 1, end: false},
   333  	}
   334  
   335  	for _, c := range testcases {
   336  		// Reset channelz IDs so `start` is valid.
   337  		channelz.IDGen.Reset()
   338  
   339  		e := tcpClearRREnv
   340  		te := newTest(t, e)
   341  		te.startServer(&testServer{security: e.security})
   342  		var ccs []*grpc.ClientConn
   343  		for i := 0; i < c.total; i++ {
   344  			cc := te.clientConn()
   345  			te.cc = nil
   346  			ccs = append(ccs, cc)
   347  		}
   348  
   349  		var svrID int64
   350  		if err := verifyResultWithDelay(func() (bool, error) {
   351  			ss, _ := channelz.GetServers(0, 0)
   352  			if len(ss) != 1 {
   353  				return false, fmt.Errorf("there should only be one server, not %d", len(ss))
   354  			}
   355  			if len(ss[0].ListenSockets) != 1 {
   356  				return false, fmt.Errorf("there should only be one server listen socket, not %d", len(ss[0].ListenSockets))
   357  			}
   358  
   359  			startID := c.start
   360  			if startID != 0 {
   361  				ns, _ := channelz.GetServerSockets(ss[0].ID, 0, int64(c.total))
   362  				if int64(len(ns)) < c.start {
   363  					return false, fmt.Errorf("there should more than %d sockets, not %d", len(ns), c.start)
   364  				}
   365  				startID = ns[c.start-1].ID + 1
   366  			}
   367  
   368  			ns, end := channelz.GetServerSockets(ss[0].ID, startID, c.max)
   369  			if int64(len(ns)) != c.length || end != c.end {
   370  				return false, fmt.Errorf("GetServerSockets(%d) = %+v (len of which: %d), end: %+v, want len(GetServerSockets(%d)) = %d, end: %+v", c.start, ns, len(ns), end, c.start, c.length, c.end)
   371  			}
   372  
   373  			svrID = ss[0].ID
   374  			return true, nil
   375  		}); err != nil {
   376  			t.Fatal(err)
   377  		}
   378  
   379  		for _, cc := range ccs {
   380  			cc.Close()
   381  		}
   382  
   383  		if err := verifyResultWithDelay(func() (bool, error) {
   384  			ns, _ := channelz.GetServerSockets(svrID, c.start, c.max)
   385  			if len(ns) != 0 {
   386  				return false, fmt.Errorf("there should be %d normal sockets not %d", 0, len(ns))
   387  			}
   388  			return true, nil
   389  		}); err != nil {
   390  			t.Fatal(err)
   391  		}
   392  		te.tearDown()
   393  	}
   394  }
   395  
   396  func (s) TestCZServerListenSocketDeletion(t *testing.T) {
   397  	s := grpc.NewServer()
   398  	lis, err := net.Listen("tcp", "localhost:0")
   399  	if err != nil {
   400  		t.Fatalf("failed to listen: %v", err)
   401  	}
   402  	go s.Serve(lis)
   403  	if err := verifyResultWithDelay(func() (bool, error) {
   404  		ss, _ := channelz.GetServers(0, 0)
   405  		if len(ss) != 1 {
   406  			return false, fmt.Errorf("there should only be one server, not %d", len(ss))
   407  		}
   408  		if len(ss[0].ListenSockets) != 1 {
   409  			return false, fmt.Errorf("there should only be one server listen socket, not %d", len(ss[0].ListenSockets))
   410  		}
   411  		return true, nil
   412  	}); err != nil {
   413  		t.Fatal(err)
   414  	}
   415  
   416  	lis.Close()
   417  	if err := verifyResultWithDelay(func() (bool, error) {
   418  		ss, _ := channelz.GetServers(0, 0)
   419  		if len(ss) != 1 {
   420  			return false, fmt.Errorf("there should be 1 server, not %d", len(ss))
   421  		}
   422  		if len(ss[0].ListenSockets) != 0 {
   423  			return false, fmt.Errorf("there should only be %d server listen socket, not %d", 0, len(ss[0].ListenSockets))
   424  		}
   425  		return true, nil
   426  	}); err != nil {
   427  		t.Fatal(err)
   428  	}
   429  	s.Stop()
   430  }
   431  
   432  type dummyChannel struct{}
   433  
   434  func (d *dummyChannel) ChannelzMetric() *channelz.ChannelInternalMetric {
   435  	return &channelz.ChannelInternalMetric{}
   436  }
   437  
   438  type dummySocket struct{}
   439  
   440  func (d *dummySocket) ChannelzMetric() *channelz.SocketInternalMetric {
   441  	return &channelz.SocketInternalMetric{}
   442  }
   443  
   444  func (s) TestCZRecusivelyDeletionOfEntry(t *testing.T) {
   445  	//           +--+TopChan+---+
   446  	//           |              |
   447  	//           v              v
   448  	//    +-+SubChan1+--+   SubChan2
   449  	//    |             |
   450  	//    v             v
   451  	// Socket1       Socket2
   452  
   453  	topChanID := channelz.RegisterChannel(&dummyChannel{}, nil, "")
   454  	subChanID1, _ := channelz.RegisterSubChannel(&dummyChannel{}, topChanID, "")
   455  	subChanID2, _ := channelz.RegisterSubChannel(&dummyChannel{}, topChanID, "")
   456  	sktID1, _ := channelz.RegisterNormalSocket(&dummySocket{}, subChanID1, "")
   457  	sktID2, _ := channelz.RegisterNormalSocket(&dummySocket{}, subChanID1, "")
   458  
   459  	tcs, _ := channelz.GetTopChannels(0, 0)
   460  	if tcs == nil || len(tcs) != 1 {
   461  		t.Fatalf("There should be one TopChannel entry")
   462  	}
   463  	if len(tcs[0].SubChans) != 2 {
   464  		t.Fatalf("There should be two SubChannel entries")
   465  	}
   466  	sc := channelz.GetSubChannel(subChanID1.Int())
   467  	if sc == nil || len(sc.Sockets) != 2 {
   468  		t.Fatalf("There should be two Socket entries")
   469  	}
   470  
   471  	channelz.RemoveEntry(topChanID)
   472  	tcs, _ = channelz.GetTopChannels(0, 0)
   473  	if tcs == nil || len(tcs) != 1 {
   474  		t.Fatalf("There should be one TopChannel entry")
   475  	}
   476  
   477  	channelz.RemoveEntry(subChanID1)
   478  	channelz.RemoveEntry(subChanID2)
   479  	tcs, _ = channelz.GetTopChannels(0, 0)
   480  	if tcs == nil || len(tcs) != 1 {
   481  		t.Fatalf("There should be one TopChannel entry")
   482  	}
   483  	if len(tcs[0].SubChans) != 1 {
   484  		t.Fatalf("There should be one SubChannel entry")
   485  	}
   486  
   487  	channelz.RemoveEntry(sktID1)
   488  	channelz.RemoveEntry(sktID2)
   489  	tcs, _ = channelz.GetTopChannels(0, 0)
   490  	if tcs != nil {
   491  		t.Fatalf("There should be no TopChannel entry")
   492  	}
   493  }
   494  
   495  func (s) TestCZChannelMetrics(t *testing.T) {
   496  	e := tcpClearRREnv
   497  	num := 3 // number of backends
   498  	te := newTest(t, e)
   499  	te.maxClientSendMsgSize = newInt(8)
   500  	var svrAddrs []resolver.Address
   501  	te.startServers(&testServer{security: e.security}, num)
   502  	r := manual.NewBuilderWithScheme("whatever")
   503  	for _, a := range te.srvAddrs {
   504  		svrAddrs = append(svrAddrs, resolver.Address{Addr: a})
   505  	}
   506  	r.InitialState(resolver.State{Addresses: svrAddrs})
   507  	te.resolverScheme = r.Scheme()
   508  	cc := te.clientConn(grpc.WithResolvers(r))
   509  	defer te.tearDown()
   510  	tc := testgrpc.NewTestServiceClient(cc)
   511  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   512  	defer cancel()
   513  	if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   514  		t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err)
   515  	}
   516  
   517  	const smallSize = 1
   518  	const largeSize = 8
   519  
   520  	largePayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, largeSize)
   521  	if err != nil {
   522  		t.Fatal(err)
   523  	}
   524  	req := &testpb.SimpleRequest{
   525  		ResponseType: testpb.PayloadType_COMPRESSABLE,
   526  		ResponseSize: int32(smallSize),
   527  		Payload:      largePayload,
   528  	}
   529  
   530  	if _, err := tc.UnaryCall(ctx, req); err == nil || status.Code(err) != codes.ResourceExhausted {
   531  		t.Fatalf("TestService/UnaryCall(_, _) = _, %v, want _, error code: %s", err, codes.ResourceExhausted)
   532  	}
   533  
   534  	stream, err := tc.FullDuplexCall(ctx)
   535  	if err != nil {
   536  		t.Fatalf("%v.FullDuplexCall(_) = _, %v, want <nil>", tc, err)
   537  	}
   538  	defer stream.CloseSend()
   539  	// Here, we just wait for all sockets to be up. In the future, if we implement
   540  	// IDLE, we may need to make several rpc calls to create the sockets.
   541  	if err := verifyResultWithDelay(func() (bool, error) {
   542  		tcs, _ := channelz.GetTopChannels(0, 0)
   543  		if len(tcs) != 1 {
   544  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
   545  		}
   546  		if len(tcs[0].SubChans) != num {
   547  			return false, fmt.Errorf("there should be %d subchannel not %d", num, len(tcs[0].SubChans))
   548  		}
   549  		var cst, csu, cf int64
   550  		for k := range tcs[0].SubChans {
   551  			sc := channelz.GetSubChannel(k)
   552  			if sc == nil {
   553  				return false, fmt.Errorf("got <nil> subchannel")
   554  			}
   555  			cst += sc.ChannelData.CallsStarted
   556  			csu += sc.ChannelData.CallsSucceeded
   557  			cf += sc.ChannelData.CallsFailed
   558  		}
   559  		if cst != 3 {
   560  			return false, fmt.Errorf("there should be 3 CallsStarted not %d", cst)
   561  		}
   562  		if csu != 1 {
   563  			return false, fmt.Errorf("there should be 1 CallsSucceeded not %d", csu)
   564  		}
   565  		if cf != 1 {
   566  			return false, fmt.Errorf("there should be 1 CallsFailed not %d", cf)
   567  		}
   568  		if tcs[0].ChannelData.CallsStarted != 3 {
   569  			return false, fmt.Errorf("there should be 3 CallsStarted not %d", tcs[0].ChannelData.CallsStarted)
   570  		}
   571  		if tcs[0].ChannelData.CallsSucceeded != 1 {
   572  			return false, fmt.Errorf("there should be 1 CallsSucceeded not %d", tcs[0].ChannelData.CallsSucceeded)
   573  		}
   574  		if tcs[0].ChannelData.CallsFailed != 1 {
   575  			return false, fmt.Errorf("there should be 1 CallsFailed not %d", tcs[0].ChannelData.CallsFailed)
   576  		}
   577  		return true, nil
   578  	}); err != nil {
   579  		t.Fatal(err)
   580  	}
   581  }
   582  
   583  func (s) TestCZServerMetrics(t *testing.T) {
   584  	e := tcpClearRREnv
   585  	te := newTest(t, e)
   586  	te.maxServerReceiveMsgSize = newInt(8)
   587  	te.startServer(&testServer{security: e.security})
   588  	defer te.tearDown()
   589  	cc := te.clientConn()
   590  	tc := testgrpc.NewTestServiceClient(cc)
   591  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   592  	defer cancel()
   593  	if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   594  		t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err)
   595  	}
   596  
   597  	const smallSize = 1
   598  	const largeSize = 8
   599  
   600  	largePayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, largeSize)
   601  	if err != nil {
   602  		t.Fatal(err)
   603  	}
   604  	req := &testpb.SimpleRequest{
   605  		ResponseType: testpb.PayloadType_COMPRESSABLE,
   606  		ResponseSize: int32(smallSize),
   607  		Payload:      largePayload,
   608  	}
   609  	if _, err := tc.UnaryCall(ctx, req); err == nil || status.Code(err) != codes.ResourceExhausted {
   610  		t.Fatalf("TestService/UnaryCall(_, _) = _, %v, want _, error code: %s", err, codes.ResourceExhausted)
   611  	}
   612  
   613  	stream, err := tc.FullDuplexCall(ctx)
   614  	if err != nil {
   615  		t.Fatalf("%v.FullDuplexCall(_) = _, %v, want <nil>", tc, err)
   616  	}
   617  	defer stream.CloseSend()
   618  
   619  	if err := verifyResultWithDelay(func() (bool, error) {
   620  		ss, _ := channelz.GetServers(0, 0)
   621  		if len(ss) != 1 {
   622  			return false, fmt.Errorf("there should only be one server, not %d", len(ss))
   623  		}
   624  		if ss[0].ServerData.CallsStarted != 3 {
   625  			return false, fmt.Errorf("there should be 3 CallsStarted not %d", ss[0].ServerData.CallsStarted)
   626  		}
   627  		if ss[0].ServerData.CallsSucceeded != 1 {
   628  			return false, fmt.Errorf("there should be 1 CallsSucceeded not %d", ss[0].ServerData.CallsSucceeded)
   629  		}
   630  		if ss[0].ServerData.CallsFailed != 1 {
   631  			return false, fmt.Errorf("there should be 1 CallsFailed not %d", ss[0].ServerData.CallsFailed)
   632  		}
   633  		return true, nil
   634  	}); err != nil {
   635  		t.Fatal(err)
   636  	}
   637  }
   638  
   639  type testServiceClientWrapper struct {
   640  	testgrpc.TestServiceClient
   641  	mu             sync.RWMutex
   642  	streamsCreated int
   643  }
   644  
   645  func (t *testServiceClientWrapper) getCurrentStreamID() uint32 {
   646  	t.mu.RLock()
   647  	defer t.mu.RUnlock()
   648  	return uint32(2*t.streamsCreated - 1)
   649  }
   650  
   651  func (t *testServiceClientWrapper) EmptyCall(ctx context.Context, in *testpb.Empty, opts ...grpc.CallOption) (*testpb.Empty, error) {
   652  	t.mu.Lock()
   653  	defer t.mu.Unlock()
   654  	t.streamsCreated++
   655  	return t.TestServiceClient.EmptyCall(ctx, in, opts...)
   656  }
   657  
   658  func (t *testServiceClientWrapper) UnaryCall(ctx context.Context, in *testpb.SimpleRequest, opts ...grpc.CallOption) (*testpb.SimpleResponse, error) {
   659  	t.mu.Lock()
   660  	defer t.mu.Unlock()
   661  	t.streamsCreated++
   662  	return t.TestServiceClient.UnaryCall(ctx, in, opts...)
   663  }
   664  
   665  func (t *testServiceClientWrapper) StreamingOutputCall(ctx context.Context, in *testpb.StreamingOutputCallRequest, opts ...grpc.CallOption) (testgrpc.TestService_StreamingOutputCallClient, error) {
   666  	t.mu.Lock()
   667  	defer t.mu.Unlock()
   668  	t.streamsCreated++
   669  	return t.TestServiceClient.StreamingOutputCall(ctx, in, opts...)
   670  }
   671  
   672  func (t *testServiceClientWrapper) StreamingInputCall(ctx context.Context, opts ...grpc.CallOption) (testgrpc.TestService_StreamingInputCallClient, error) {
   673  	t.mu.Lock()
   674  	defer t.mu.Unlock()
   675  	t.streamsCreated++
   676  	return t.TestServiceClient.StreamingInputCall(ctx, opts...)
   677  }
   678  
   679  func (t *testServiceClientWrapper) FullDuplexCall(ctx context.Context, opts ...grpc.CallOption) (testgrpc.TestService_FullDuplexCallClient, error) {
   680  	t.mu.Lock()
   681  	defer t.mu.Unlock()
   682  	t.streamsCreated++
   683  	return t.TestServiceClient.FullDuplexCall(ctx, opts...)
   684  }
   685  
   686  func (t *testServiceClientWrapper) HalfDuplexCall(ctx context.Context, opts ...grpc.CallOption) (testgrpc.TestService_HalfDuplexCallClient, error) {
   687  	t.mu.Lock()
   688  	defer t.mu.Unlock()
   689  	t.streamsCreated++
   690  	return t.TestServiceClient.HalfDuplexCall(ctx, opts...)
   691  }
   692  
   693  func doSuccessfulUnaryCall(tc testgrpc.TestServiceClient, t *testing.T) {
   694  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   695  	defer cancel()
   696  	if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   697  		t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err)
   698  	}
   699  }
   700  
   701  func doStreamingInputCallWithLargePayload(tc testgrpc.TestServiceClient, t *testing.T) {
   702  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   703  	defer cancel()
   704  	s, err := tc.StreamingInputCall(ctx)
   705  	if err != nil {
   706  		t.Fatalf("TestService/StreamingInputCall(_) = _, %v, want <nil>", err)
   707  	}
   708  	payload, err := newPayload(testpb.PayloadType_COMPRESSABLE, 10000)
   709  	if err != nil {
   710  		t.Fatal(err)
   711  	}
   712  	s.Send(&testpb.StreamingInputCallRequest{Payload: payload})
   713  }
   714  
   715  func doServerSideFailedUnaryCall(tc testgrpc.TestServiceClient, t *testing.T) {
   716  	const smallSize = 1
   717  	const largeSize = 2000
   718  
   719  	largePayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, largeSize)
   720  	if err != nil {
   721  		t.Fatal(err)
   722  	}
   723  	req := &testpb.SimpleRequest{
   724  		ResponseType: testpb.PayloadType_COMPRESSABLE,
   725  		ResponseSize: int32(smallSize),
   726  		Payload:      largePayload,
   727  	}
   728  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   729  	defer cancel()
   730  	if _, err := tc.UnaryCall(ctx, req); err == nil || status.Code(err) != codes.ResourceExhausted {
   731  		t.Fatalf("TestService/UnaryCall(_, _) = _, %v, want _, error code: %s", err, codes.ResourceExhausted)
   732  	}
   733  }
   734  
   735  func doClientSideInitiatedFailedStream(tc testgrpc.TestServiceClient, t *testing.T) {
   736  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   737  	stream, err := tc.FullDuplexCall(ctx)
   738  	if err != nil {
   739  		t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err)
   740  	}
   741  
   742  	const smallSize = 1
   743  	smallPayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, smallSize)
   744  	if err != nil {
   745  		t.Fatal(err)
   746  	}
   747  
   748  	sreq := &testpb.StreamingOutputCallRequest{
   749  		ResponseType: testpb.PayloadType_COMPRESSABLE,
   750  		ResponseParameters: []*testpb.ResponseParameters{
   751  			{Size: smallSize},
   752  		},
   753  		Payload: smallPayload,
   754  	}
   755  
   756  	if err := stream.Send(sreq); err != nil {
   757  		t.Fatalf("%v.Send(%v) = %v, want <nil>", stream, sreq, err)
   758  	}
   759  	if _, err := stream.Recv(); err != nil {
   760  		t.Fatalf("%v.Recv() = %v, want <nil>", stream, err)
   761  	}
   762  	// By canceling the call, the client will send rst_stream to end the call, and
   763  	// the stream will failed as a result.
   764  	cancel()
   765  }
   766  
   767  // This func is to be used to test client side counting of failed streams.
   768  func doServerSideInitiatedFailedStreamWithRSTStream(tc testgrpc.TestServiceClient, t *testing.T, l *listenerWrapper) {
   769  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   770  	defer cancel()
   771  	stream, err := tc.FullDuplexCall(ctx)
   772  	if err != nil {
   773  		t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err)
   774  	}
   775  
   776  	const smallSize = 1
   777  	smallPayload, err := newPayload(testpb.PayloadType_COMPRESSABLE, smallSize)
   778  	if err != nil {
   779  		t.Fatal(err)
   780  	}
   781  
   782  	sreq := &testpb.StreamingOutputCallRequest{
   783  		ResponseType: testpb.PayloadType_COMPRESSABLE,
   784  		ResponseParameters: []*testpb.ResponseParameters{
   785  			{Size: smallSize},
   786  		},
   787  		Payload: smallPayload,
   788  	}
   789  
   790  	if err := stream.Send(sreq); err != nil {
   791  		t.Fatalf("%v.Send(%v) = %v, want <nil>", stream, sreq, err)
   792  	}
   793  	if _, err := stream.Recv(); err != nil {
   794  		t.Fatalf("%v.Recv() = %v, want <nil>", stream, err)
   795  	}
   796  
   797  	rcw := l.getLastConn()
   798  
   799  	if rcw != nil {
   800  		rcw.writeRSTStream(tc.(*testServiceClientWrapper).getCurrentStreamID(), http2.ErrCodeCancel)
   801  	}
   802  	if _, err := stream.Recv(); err == nil {
   803  		t.Fatalf("%v.Recv() = %v, want <non-nil>", stream, err)
   804  	}
   805  }
   806  
   807  // this func is to be used to test client side counting of failed streams.
   808  func doServerSideInitiatedFailedStreamWithGoAway(ctx context.Context, tc testgrpc.TestServiceClient, t *testing.T, l *listenerWrapper) {
   809  	// This call is just to keep the transport from shutting down (socket will be deleted
   810  	// in this case, and we will not be able to get metrics).
   811  	s, err := tc.FullDuplexCall(ctx)
   812  	if err != nil {
   813  		t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err)
   814  	}
   815  	if err := s.Send(&testpb.StreamingOutputCallRequest{ResponseParameters: []*testpb.ResponseParameters{
   816  		{
   817  			Size: 1,
   818  		},
   819  	}}); err != nil {
   820  		t.Fatalf("s.Send() failed with error: %v", err)
   821  	}
   822  	if _, err := s.Recv(); err != nil {
   823  		t.Fatalf("s.Recv() failed with error: %v", err)
   824  	}
   825  
   826  	s, err = tc.FullDuplexCall(ctx)
   827  	if err != nil {
   828  		t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err)
   829  	}
   830  	if err := s.Send(&testpb.StreamingOutputCallRequest{ResponseParameters: []*testpb.ResponseParameters{
   831  		{
   832  			Size: 1,
   833  		},
   834  	}}); err != nil {
   835  		t.Fatalf("s.Send() failed with error: %v", err)
   836  	}
   837  	if _, err := s.Recv(); err != nil {
   838  		t.Fatalf("s.Recv() failed with error: %v", err)
   839  	}
   840  
   841  	rcw := l.getLastConn()
   842  	if rcw != nil {
   843  		rcw.writeGoAway(tc.(*testServiceClientWrapper).getCurrentStreamID()-2, http2.ErrCodeCancel, []byte{})
   844  	}
   845  	if _, err := s.Recv(); err == nil {
   846  		t.Fatalf("%v.Recv() = %v, want <non-nil>", s, err)
   847  	}
   848  }
   849  
   850  func (s) TestCZClientSocketMetricsStreamsAndMessagesCount(t *testing.T) {
   851  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   852  	defer cancel()
   853  
   854  	e := tcpClearRREnv
   855  	te := newTest(t, e)
   856  	te.maxServerReceiveMsgSize = newInt(20)
   857  	te.maxClientReceiveMsgSize = newInt(20)
   858  	rcw := te.startServerWithConnControl(&testServer{security: e.security})
   859  	defer te.tearDown()
   860  	cc := te.clientConn()
   861  	tc := &testServiceClientWrapper{TestServiceClient: testgrpc.NewTestServiceClient(cc)}
   862  
   863  	doSuccessfulUnaryCall(tc, t)
   864  	var scID, skID int64
   865  	if err := verifyResultWithDelay(func() (bool, error) {
   866  		tchan, _ := channelz.GetTopChannels(0, 0)
   867  		if len(tchan) != 1 {
   868  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan))
   869  		}
   870  		if len(tchan[0].SubChans) != 1 {
   871  			return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans))
   872  		}
   873  
   874  		for scID = range tchan[0].SubChans {
   875  			break
   876  		}
   877  		sc := channelz.GetSubChannel(scID)
   878  		if sc == nil {
   879  			return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", scID)
   880  		}
   881  		if len(sc.Sockets) != 1 {
   882  			return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets))
   883  		}
   884  		for skID = range sc.Sockets {
   885  			break
   886  		}
   887  		skt := channelz.GetSocket(skID)
   888  		sktData := skt.SocketData
   889  		if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 1 || sktData.MessagesSent != 1 || sktData.MessagesReceived != 1 {
   890  			return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, MessagesSent, MessagesReceived) = (1, 1, 1, 1), got (%d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.MessagesSent, sktData.MessagesReceived)
   891  		}
   892  		return true, nil
   893  	}); err != nil {
   894  		t.Fatal(err)
   895  	}
   896  
   897  	doServerSideFailedUnaryCall(tc, t)
   898  	if err := verifyResultWithDelay(func() (bool, error) {
   899  		skt := channelz.GetSocket(skID)
   900  		sktData := skt.SocketData
   901  		if sktData.StreamsStarted != 2 || sktData.StreamsSucceeded != 2 || sktData.MessagesSent != 2 || sktData.MessagesReceived != 1 {
   902  			return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, MessagesSent, MessagesReceived) = (2, 2, 2, 1), got (%d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.MessagesSent, sktData.MessagesReceived)
   903  		}
   904  		return true, nil
   905  	}); err != nil {
   906  		t.Fatal(err)
   907  	}
   908  
   909  	doClientSideInitiatedFailedStream(tc, t)
   910  	if err := verifyResultWithDelay(func() (bool, error) {
   911  		skt := channelz.GetSocket(skID)
   912  		sktData := skt.SocketData
   913  		if sktData.StreamsStarted != 3 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 1 || sktData.MessagesSent != 3 || sktData.MessagesReceived != 2 {
   914  			return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (3, 2, 1, 3, 2), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived)
   915  		}
   916  		return true, nil
   917  	}); err != nil {
   918  		t.Fatal(err)
   919  	}
   920  
   921  	doServerSideInitiatedFailedStreamWithRSTStream(tc, t, rcw)
   922  	if err := verifyResultWithDelay(func() (bool, error) {
   923  		skt := channelz.GetSocket(skID)
   924  		sktData := skt.SocketData
   925  		if sktData.StreamsStarted != 4 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 2 || sktData.MessagesSent != 4 || sktData.MessagesReceived != 3 {
   926  			return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (4, 2, 2, 4, 3), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived)
   927  		}
   928  		return true, nil
   929  	}); err != nil {
   930  		t.Fatal(err)
   931  	}
   932  
   933  	doServerSideInitiatedFailedStreamWithGoAway(ctx, tc, t, rcw)
   934  	if err := verifyResultWithDelay(func() (bool, error) {
   935  		skt := channelz.GetSocket(skID)
   936  		sktData := skt.SocketData
   937  		if sktData.StreamsStarted != 6 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 3 || sktData.MessagesSent != 6 || sktData.MessagesReceived != 5 {
   938  			return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (6, 2, 3, 6, 5), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived)
   939  		}
   940  		return true, nil
   941  	}); err != nil {
   942  		t.Fatal(err)
   943  	}
   944  }
   945  
   946  // This test is to complete TestCZClientSocketMetricsStreamsAndMessagesCount and
   947  // TestCZServerSocketMetricsStreamsAndMessagesCount by adding the test case of
   948  // server sending RST_STREAM to client due to client side flow control violation.
   949  // It is separated from other cases due to setup incompatibly, i.e. max receive
   950  // size violation will mask flow control violation.
   951  func (s) TestCZClientAndServerSocketMetricsStreamsCountFlowControlRSTStream(t *testing.T) {
   952  	e := tcpClearRREnv
   953  	te := newTest(t, e)
   954  	te.serverInitialWindowSize = 65536
   955  	// Avoid overflowing connection level flow control window, which will lead to
   956  	// transport being closed.
   957  	te.serverInitialConnWindowSize = 65536 * 2
   958  	ts := &stubserver.StubServer{FullDuplexCallF: func(stream testgrpc.TestService_FullDuplexCallServer) error {
   959  		stream.Send(&testpb.StreamingOutputCallResponse{})
   960  		<-stream.Context().Done()
   961  		return status.Errorf(codes.DeadlineExceeded, "deadline exceeded or cancelled")
   962  	}}
   963  	te.startServer(ts)
   964  	defer te.tearDown()
   965  	cc, dw := te.clientConnWithConnControl()
   966  	tc := &testServiceClientWrapper{TestServiceClient: testgrpc.NewTestServiceClient(cc)}
   967  
   968  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   969  	stream, err := tc.FullDuplexCall(ctx)
   970  	if err != nil {
   971  		t.Fatalf("TestService/FullDuplexCall(_) = _, %v, want <nil>", err)
   972  	}
   973  	if _, err := stream.Recv(); err != nil {
   974  		t.Fatalf("stream.Recv() = %v, want nil", err)
   975  	}
   976  	go func() {
   977  		payload := make([]byte, 16384)
   978  		for i := 0; i < 6; i++ {
   979  			dw.getRawConnWrapper().writeRawFrame(http2.FrameData, 0, tc.getCurrentStreamID(), payload)
   980  		}
   981  	}()
   982  	if _, err := stream.Recv(); status.Code(err) != codes.ResourceExhausted {
   983  		t.Fatalf("stream.Recv() = %v, want error code: %v", err, codes.ResourceExhausted)
   984  	}
   985  	cancel()
   986  
   987  	if err := verifyResultWithDelay(func() (bool, error) {
   988  		tchan, _ := channelz.GetTopChannels(0, 0)
   989  		if len(tchan) != 1 {
   990  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan))
   991  		}
   992  		if len(tchan[0].SubChans) != 1 {
   993  			return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans))
   994  		}
   995  		var id int64
   996  		for id = range tchan[0].SubChans {
   997  			break
   998  		}
   999  		sc := channelz.GetSubChannel(id)
  1000  		if sc == nil {
  1001  			return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id)
  1002  		}
  1003  		if len(sc.Sockets) != 1 {
  1004  			return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets))
  1005  		}
  1006  		for id = range sc.Sockets {
  1007  			break
  1008  		}
  1009  		skt := channelz.GetSocket(id)
  1010  		sktData := skt.SocketData
  1011  		if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 0 || sktData.StreamsFailed != 1 {
  1012  			return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed) = (1, 0, 1), got (%d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed)
  1013  		}
  1014  		ss, _ := channelz.GetServers(0, 0)
  1015  		if len(ss) != 1 {
  1016  			return false, fmt.Errorf("there should only be one server, not %d", len(ss))
  1017  		}
  1018  
  1019  		ns, _ := channelz.GetServerSockets(ss[0].ID, 0, 0)
  1020  		if len(ns) != 1 {
  1021  			return false, fmt.Errorf("there should be one server normal socket, not %d", len(ns))
  1022  		}
  1023  		sktData = ns[0].SocketData
  1024  		if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 0 || sktData.StreamsFailed != 1 {
  1025  			return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, StreamsFailed) = (1, 0, 1), got (%d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed)
  1026  		}
  1027  		return true, nil
  1028  	}); err != nil {
  1029  		t.Fatal(err)
  1030  	}
  1031  }
  1032  
  1033  func (s) TestCZClientAndServerSocketMetricsFlowControl(t *testing.T) {
  1034  	e := tcpClearRREnv
  1035  	te := newTest(t, e)
  1036  	// disable BDP
  1037  	te.serverInitialWindowSize = 65536
  1038  	te.serverInitialConnWindowSize = 65536
  1039  	te.clientInitialWindowSize = 65536
  1040  	te.clientInitialConnWindowSize = 65536
  1041  	te.startServer(&testServer{security: e.security})
  1042  	defer te.tearDown()
  1043  	cc := te.clientConn()
  1044  	tc := testgrpc.NewTestServiceClient(cc)
  1045  
  1046  	for i := 0; i < 10; i++ {
  1047  		doSuccessfulUnaryCall(tc, t)
  1048  	}
  1049  
  1050  	var cliSktID, svrSktID int64
  1051  	if err := verifyResultWithDelay(func() (bool, error) {
  1052  		tchan, _ := channelz.GetTopChannels(0, 0)
  1053  		if len(tchan) != 1 {
  1054  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan))
  1055  		}
  1056  		if len(tchan[0].SubChans) != 1 {
  1057  			return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans))
  1058  		}
  1059  		var id int64
  1060  		for id = range tchan[0].SubChans {
  1061  			break
  1062  		}
  1063  		sc := channelz.GetSubChannel(id)
  1064  		if sc == nil {
  1065  			return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id)
  1066  		}
  1067  		if len(sc.Sockets) != 1 {
  1068  			return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets))
  1069  		}
  1070  		for id = range sc.Sockets {
  1071  			break
  1072  		}
  1073  		skt := channelz.GetSocket(id)
  1074  		sktData := skt.SocketData
  1075  		// 65536 - 5 (Length-Prefixed-Message size) * 10 = 65486
  1076  		if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 65486 {
  1077  			return false, fmt.Errorf("client: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65536, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow)
  1078  		}
  1079  		ss, _ := channelz.GetServers(0, 0)
  1080  		if len(ss) != 1 {
  1081  			return false, fmt.Errorf("there should only be one server, not %d", len(ss))
  1082  		}
  1083  		ns, _ := channelz.GetServerSockets(ss[0].ID, 0, 0)
  1084  		sktData = ns[0].SocketData
  1085  		if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 65486 {
  1086  			return false, fmt.Errorf("server: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65536, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow)
  1087  		}
  1088  		cliSktID, svrSktID = id, ss[0].ID
  1089  		return true, nil
  1090  	}); err != nil {
  1091  		t.Fatal(err)
  1092  	}
  1093  
  1094  	doStreamingInputCallWithLargePayload(tc, t)
  1095  
  1096  	if err := verifyResultWithDelay(func() (bool, error) {
  1097  		skt := channelz.GetSocket(cliSktID)
  1098  		sktData := skt.SocketData
  1099  		// Local: 65536 - 5 (Length-Prefixed-Message size) * 10 = 65486
  1100  		// Remote: 65536 - 5 (Length-Prefixed-Message size) * 10 - 10011 = 55475
  1101  		if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 55475 {
  1102  			return false, fmt.Errorf("client: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65486, 55475), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow)
  1103  		}
  1104  		ss, _ := channelz.GetServers(0, 0)
  1105  		if len(ss) != 1 {
  1106  			return false, fmt.Errorf("there should only be one server, not %d", len(ss))
  1107  		}
  1108  		ns, _ := channelz.GetServerSockets(svrSktID, 0, 0)
  1109  		sktData = ns[0].SocketData
  1110  		if sktData.LocalFlowControlWindow != 55475 || sktData.RemoteFlowControlWindow != 65486 {
  1111  			return false, fmt.Errorf("server: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (55475, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow)
  1112  		}
  1113  		return true, nil
  1114  	}); err != nil {
  1115  		t.Fatal(err)
  1116  	}
  1117  
  1118  	// triggers transport flow control window update on server side, since unacked
  1119  	// bytes should be larger than limit now. i.e. 50 + 20022 > 65536/4.
  1120  	doStreamingInputCallWithLargePayload(tc, t)
  1121  	if err := verifyResultWithDelay(func() (bool, error) {
  1122  		skt := channelz.GetSocket(cliSktID)
  1123  		sktData := skt.SocketData
  1124  		// Local: 65536 - 5 (Length-Prefixed-Message size) * 10 = 65486
  1125  		// Remote: 65536
  1126  		if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 65536 {
  1127  			return false, fmt.Errorf("client: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65486, 65536), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow)
  1128  		}
  1129  		ss, _ := channelz.GetServers(0, 0)
  1130  		if len(ss) != 1 {
  1131  			return false, fmt.Errorf("there should only be one server, not %d", len(ss))
  1132  		}
  1133  		ns, _ := channelz.GetServerSockets(svrSktID, 0, 0)
  1134  		sktData = ns[0].SocketData
  1135  		if sktData.LocalFlowControlWindow != 65536 || sktData.RemoteFlowControlWindow != 65486 {
  1136  			return false, fmt.Errorf("server: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65536, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow)
  1137  		}
  1138  		return true, nil
  1139  	}); err != nil {
  1140  		t.Fatal(err)
  1141  	}
  1142  }
  1143  
  1144  func (s) TestCZClientSocketMetricsKeepAlive(t *testing.T) {
  1145  	const keepaliveRate = 50 * time.Millisecond
  1146  	defer func(t time.Duration) { internal.KeepaliveMinPingTime = t }(internal.KeepaliveMinPingTime)
  1147  	internal.KeepaliveMinPingTime = keepaliveRate
  1148  	e := tcpClearRREnv
  1149  	te := newTest(t, e)
  1150  	te.customDialOptions = append(te.customDialOptions, grpc.WithKeepaliveParams(
  1151  		keepalive.ClientParameters{
  1152  			Time:                keepaliveRate,
  1153  			Timeout:             500 * time.Millisecond,
  1154  			PermitWithoutStream: true,
  1155  		}))
  1156  	te.customServerOptions = append(te.customServerOptions, grpc.KeepaliveEnforcementPolicy(
  1157  		keepalive.EnforcementPolicy{
  1158  			MinTime:             keepaliveRate,
  1159  			PermitWithoutStream: true,
  1160  		}))
  1161  	te.startServer(&testServer{security: e.security})
  1162  	cc := te.clientConn() // Dial the server
  1163  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1164  	defer cancel()
  1165  	testutils.AwaitState(ctx, t, cc, connectivity.Ready)
  1166  	start := time.Now()
  1167  	// Wait for at least two keepalives to be able to occur.
  1168  	time.Sleep(2 * keepaliveRate)
  1169  	defer te.tearDown()
  1170  	if err := verifyResultWithDelay(func() (bool, error) {
  1171  		tchan, _ := channelz.GetTopChannels(0, 0)
  1172  		if len(tchan) != 1 {
  1173  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan))
  1174  		}
  1175  		if len(tchan[0].SubChans) != 1 {
  1176  			return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans))
  1177  		}
  1178  		var id int64
  1179  		for id = range tchan[0].SubChans {
  1180  			break
  1181  		}
  1182  		sc := channelz.GetSubChannel(id)
  1183  		if sc == nil {
  1184  			return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id)
  1185  		}
  1186  		if len(sc.Sockets) != 1 {
  1187  			return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets))
  1188  		}
  1189  		for id = range sc.Sockets {
  1190  			break
  1191  		}
  1192  		skt := channelz.GetSocket(id)
  1193  		want := int64(time.Since(start) / keepaliveRate)
  1194  		if skt.SocketData.KeepAlivesSent != want {
  1195  			return false, fmt.Errorf("there should be %v KeepAlives sent, not %d", want, skt.SocketData.KeepAlivesSent)
  1196  		}
  1197  		return true, nil
  1198  	}); err != nil {
  1199  		t.Fatal(err)
  1200  	}
  1201  }
  1202  
  1203  func (s) TestCZServerSocketMetricsStreamsAndMessagesCount(t *testing.T) {
  1204  	e := tcpClearRREnv
  1205  	te := newTest(t, e)
  1206  	te.maxServerReceiveMsgSize = newInt(20)
  1207  	te.maxClientReceiveMsgSize = newInt(20)
  1208  	te.startServer(&testServer{security: e.security})
  1209  	defer te.tearDown()
  1210  	cc, _ := te.clientConnWithConnControl()
  1211  	tc := &testServiceClientWrapper{TestServiceClient: testgrpc.NewTestServiceClient(cc)}
  1212  
  1213  	var svrID int64
  1214  	if err := verifyResultWithDelay(func() (bool, error) {
  1215  		ss, _ := channelz.GetServers(0, 0)
  1216  		if len(ss) != 1 {
  1217  			return false, fmt.Errorf("there should only be one server, not %d", len(ss))
  1218  		}
  1219  		svrID = ss[0].ID
  1220  		return true, nil
  1221  	}); err != nil {
  1222  		t.Fatal(err)
  1223  	}
  1224  
  1225  	doSuccessfulUnaryCall(tc, t)
  1226  	if err := verifyResultWithDelay(func() (bool, error) {
  1227  		ns, _ := channelz.GetServerSockets(svrID, 0, 0)
  1228  		sktData := ns[0].SocketData
  1229  		if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 1 || sktData.StreamsFailed != 0 || sktData.MessagesSent != 1 || sktData.MessagesReceived != 1 {
  1230  			return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, MessagesSent, MessagesReceived) = (1, 1, 1, 1), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived)
  1231  		}
  1232  		return true, nil
  1233  	}); err != nil {
  1234  		t.Fatal(err)
  1235  	}
  1236  
  1237  	doServerSideFailedUnaryCall(tc, t)
  1238  	if err := verifyResultWithDelay(func() (bool, error) {
  1239  		ns, _ := channelz.GetServerSockets(svrID, 0, 0)
  1240  		sktData := ns[0].SocketData
  1241  		if sktData.StreamsStarted != 2 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 0 || sktData.MessagesSent != 1 || sktData.MessagesReceived != 1 {
  1242  			return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (2, 2, 0, 1, 1), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived)
  1243  		}
  1244  		return true, nil
  1245  	}); err != nil {
  1246  		t.Fatal(err)
  1247  	}
  1248  
  1249  	doClientSideInitiatedFailedStream(tc, t)
  1250  	if err := verifyResultWithDelay(func() (bool, error) {
  1251  		ns, _ := channelz.GetServerSockets(svrID, 0, 0)
  1252  		sktData := ns[0].SocketData
  1253  		if sktData.StreamsStarted != 3 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 1 || sktData.MessagesSent != 2 || sktData.MessagesReceived != 2 {
  1254  			return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (3, 2, 1, 2, 2), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived)
  1255  		}
  1256  		return true, nil
  1257  	}); err != nil {
  1258  		t.Fatal(err)
  1259  	}
  1260  }
  1261  
  1262  func (s) TestCZServerSocketMetricsKeepAlive(t *testing.T) {
  1263  	defer func(t time.Duration) { internal.KeepaliveMinServerPingTime = t }(internal.KeepaliveMinServerPingTime)
  1264  	internal.KeepaliveMinServerPingTime = 50 * time.Millisecond
  1265  
  1266  	e := tcpClearRREnv
  1267  	te := newTest(t, e)
  1268  	// We setup the server keepalive parameters to send one keepalive every
  1269  	// 50ms, and verify that the actual number of keepalives is very close to
  1270  	// Time/50ms.  We had a bug wherein the server was sending one keepalive
  1271  	// every [Time+Timeout] instead of every [Time] period, and since Timeout
  1272  	// is configured to a high value here, we should be able to verify that the
  1273  	// fix works with the above mentioned logic.
  1274  	kpOption := grpc.KeepaliveParams(keepalive.ServerParameters{
  1275  		Time:    50 * time.Millisecond,
  1276  		Timeout: 5 * time.Second,
  1277  	})
  1278  	te.customServerOptions = append(te.customServerOptions, kpOption)
  1279  	te.startServer(&testServer{security: e.security})
  1280  	defer te.tearDown()
  1281  	cc := te.clientConn()
  1282  
  1283  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1284  	defer cancel()
  1285  	testutils.AwaitState(ctx, t, cc, connectivity.Ready)
  1286  
  1287  	// Allow about 5 pings to happen (250ms/50ms).
  1288  	time.Sleep(255 * time.Millisecond)
  1289  
  1290  	ss, _ := channelz.GetServers(0, 0)
  1291  	if len(ss) != 1 {
  1292  		t.Fatalf("there should be one server, not %d", len(ss))
  1293  	}
  1294  	ns, _ := channelz.GetServerSockets(ss[0].ID, 0, 0)
  1295  	if len(ns) != 1 {
  1296  		t.Fatalf("there should be one server normal socket, not %d", len(ns))
  1297  	}
  1298  	const wantMin, wantMax = 3, 7
  1299  	if got := ns[0].SocketData.KeepAlivesSent; got < wantMin || got > wantMax {
  1300  		t.Fatalf("got keepalivesCount: %v, want keepalivesCount: [%v,%v]", got, wantMin, wantMax)
  1301  	}
  1302  }
  1303  
  1304  var cipherSuites = []string{
  1305  	"TLS_RSA_WITH_RC4_128_SHA",
  1306  	"TLS_RSA_WITH_3DES_EDE_CBC_SHA",
  1307  	"TLS_RSA_WITH_AES_128_CBC_SHA",
  1308  	"TLS_RSA_WITH_AES_256_CBC_SHA",
  1309  	"TLS_RSA_WITH_AES_128_GCM_SHA256",
  1310  	"TLS_RSA_WITH_AES_256_GCM_SHA384",
  1311  	"TLS_ECDHE_ECDSA_WITH_RC4_128_SHA",
  1312  	"TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA",
  1313  	"TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",
  1314  	"TLS_ECDHE_RSA_WITH_RC4_128_SHA",
  1315  	"TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA",
  1316  	"TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA",
  1317  	"TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA",
  1318  	"TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256",
  1319  	"TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",
  1320  	"TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",
  1321  	"TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",
  1322  	"TLS_FALLBACK_SCSV",
  1323  	"TLS_RSA_WITH_AES_128_CBC_SHA256",
  1324  	"TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",
  1325  	"TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",
  1326  	"TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305",
  1327  	"TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305",
  1328  	"TLS_AES_128_GCM_SHA256",
  1329  	"TLS_AES_256_GCM_SHA384",
  1330  	"TLS_CHACHA20_POLY1305_SHA256",
  1331  }
  1332  
  1333  func (s) TestCZSocketGetSecurityValueTLS(t *testing.T) {
  1334  	e := tcpTLSRREnv
  1335  	te := newTest(t, e)
  1336  	te.startServer(&testServer{security: e.security})
  1337  	defer te.tearDown()
  1338  	te.clientConn()
  1339  	if err := verifyResultWithDelay(func() (bool, error) {
  1340  		tchan, _ := channelz.GetTopChannels(0, 0)
  1341  		if len(tchan) != 1 {
  1342  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan))
  1343  		}
  1344  		if len(tchan[0].SubChans) != 1 {
  1345  			return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans))
  1346  		}
  1347  		var id int64
  1348  		for id = range tchan[0].SubChans {
  1349  			break
  1350  		}
  1351  		sc := channelz.GetSubChannel(id)
  1352  		if sc == nil {
  1353  			return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id)
  1354  		}
  1355  		if len(sc.Sockets) != 1 {
  1356  			return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets))
  1357  		}
  1358  		for id = range sc.Sockets {
  1359  			break
  1360  		}
  1361  		skt := channelz.GetSocket(id)
  1362  		cert, _ := tls.LoadX509KeyPair(testdata.Path("x509/server1_cert.pem"), testdata.Path("x509/server1_key.pem"))
  1363  		securityVal, ok := skt.SocketData.Security.(*credentials.TLSChannelzSecurityValue)
  1364  		if !ok {
  1365  			return false, fmt.Errorf("the SocketData.Security is of type: %T, want: *credentials.TLSChannelzSecurityValue", skt.SocketData.Security)
  1366  		}
  1367  		if !cmp.Equal(securityVal.RemoteCertificate, cert.Certificate[0]) {
  1368  			return false, fmt.Errorf("SocketData.Security.RemoteCertificate got: %v, want: %v", securityVal.RemoteCertificate, cert.Certificate[0])
  1369  		}
  1370  		for _, v := range cipherSuites {
  1371  			if v == securityVal.StandardName {
  1372  				return true, nil
  1373  			}
  1374  		}
  1375  		return false, fmt.Errorf("SocketData.Security.StandardName got: %v, want it to be one of %v", securityVal.StandardName, cipherSuites)
  1376  	}); err != nil {
  1377  		t.Fatal(err)
  1378  	}
  1379  }
  1380  
  1381  func (s) TestCZChannelTraceCreationDeletion(t *testing.T) {
  1382  	e := tcpClearRREnv
  1383  	// avoid calling API to set balancer type, which will void service config's change of balancer.
  1384  	e.balancer = ""
  1385  	te := newTest(t, e)
  1386  	r := manual.NewBuilderWithScheme("whatever")
  1387  	te.resolverScheme = r.Scheme()
  1388  	te.clientConn(grpc.WithResolvers(r))
  1389  	resolvedAddrs := []resolver.Address{{Addr: "127.0.0.1:0", ServerName: "grpclb.server"}}
  1390  	grpclbConfig := parseServiceConfig(t, r, `{"loadBalancingPolicy": "grpclb"}`)
  1391  	r.UpdateState(grpclbstate.Set(resolver.State{ServiceConfig: grpclbConfig}, &grpclbstate.State{BalancerAddresses: resolvedAddrs}))
  1392  	defer te.tearDown()
  1393  
  1394  	var nestedConn int64
  1395  	if err := verifyResultWithDelay(func() (bool, error) {
  1396  		tcs, _ := channelz.GetTopChannels(0, 0)
  1397  		if len(tcs) != 1 {
  1398  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
  1399  		}
  1400  		if len(tcs[0].NestedChans) != 1 {
  1401  			return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(tcs[0].NestedChans))
  1402  		}
  1403  		for k := range tcs[0].NestedChans {
  1404  			nestedConn = k
  1405  		}
  1406  		for _, e := range tcs[0].Trace.Events {
  1407  			if e.RefID == nestedConn && e.RefType != channelz.RefChannel {
  1408  				return false, fmt.Errorf("nested channel trace event shoud have RefChannel as RefType")
  1409  			}
  1410  		}
  1411  		ncm := channelz.GetChannel(nestedConn)
  1412  		if ncm.Trace == nil {
  1413  			return false, fmt.Errorf("trace for nested channel should not be empty")
  1414  		}
  1415  		if len(ncm.Trace.Events) == 0 {
  1416  			return false, fmt.Errorf("there should be at least one trace event for nested channel not 0")
  1417  		}
  1418  		pattern := `Channel created`
  1419  		if ok, _ := regexp.MatchString(pattern, ncm.Trace.Events[0].Desc); !ok {
  1420  			return false, fmt.Errorf("the first trace event should be %q, not %q", pattern, ncm.Trace.Events[0].Desc)
  1421  		}
  1422  		return true, nil
  1423  	}); err != nil {
  1424  		t.Fatal(err)
  1425  	}
  1426  
  1427  	r.UpdateState(resolver.State{
  1428  		Addresses:     []resolver.Address{{Addr: "127.0.0.1:0"}},
  1429  		ServiceConfig: parseServiceConfig(t, r, `{"loadBalancingPolicy": "round_robin"}`),
  1430  	})
  1431  
  1432  	// wait for the shutdown of grpclb balancer
  1433  	if err := verifyResultWithDelay(func() (bool, error) {
  1434  		tcs, _ := channelz.GetTopChannels(0, 0)
  1435  		if len(tcs) != 1 {
  1436  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
  1437  		}
  1438  		if len(tcs[0].NestedChans) != 0 {
  1439  			return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(tcs[0].NestedChans))
  1440  		}
  1441  		ncm := channelz.GetChannel(nestedConn)
  1442  		if ncm == nil {
  1443  			return false, fmt.Errorf("nested channel should still exist due to parent's trace reference")
  1444  		}
  1445  		if ncm.Trace == nil {
  1446  			return false, fmt.Errorf("trace for nested channel should not be empty")
  1447  		}
  1448  		if len(ncm.Trace.Events) == 0 {
  1449  			return false, fmt.Errorf("there should be at least one trace event for nested channel not 0")
  1450  		}
  1451  		pattern := `Channel created`
  1452  		if ok, _ := regexp.MatchString(pattern, ncm.Trace.Events[0].Desc); !ok {
  1453  			return false, fmt.Errorf("the first trace event should be %q, not %q", pattern, ncm.Trace.Events[0].Desc)
  1454  		}
  1455  		return true, nil
  1456  	}); err != nil {
  1457  		t.Fatal(err)
  1458  	}
  1459  }
  1460  
  1461  func (s) TestCZSubChannelTraceCreationDeletion(t *testing.T) {
  1462  	e := tcpClearRREnv
  1463  	te := newTest(t, e)
  1464  	te.startServer(&testServer{security: e.security})
  1465  	r := manual.NewBuilderWithScheme("whatever")
  1466  	r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}})
  1467  	te.resolverScheme = r.Scheme()
  1468  	te.clientConn(grpc.WithResolvers(r))
  1469  	defer te.tearDown()
  1470  	var subConn int64
  1471  	// Here, we just wait for all sockets to be up. In the future, if we implement
  1472  	// IDLE, we may need to make several rpc calls to create the sockets.
  1473  	if err := verifyResultWithDelay(func() (bool, error) {
  1474  		tcs, _ := channelz.GetTopChannels(0, 0)
  1475  		if len(tcs) != 1 {
  1476  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
  1477  		}
  1478  		if len(tcs[0].SubChans) != 1 {
  1479  			return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans))
  1480  		}
  1481  		for k := range tcs[0].SubChans {
  1482  			subConn = k
  1483  		}
  1484  		for _, e := range tcs[0].Trace.Events {
  1485  			if e.RefID == subConn && e.RefType != channelz.RefSubChannel {
  1486  				return false, fmt.Errorf("subchannel trace event shoud have RefType to be RefSubChannel")
  1487  			}
  1488  		}
  1489  		scm := channelz.GetSubChannel(subConn)
  1490  		if scm == nil {
  1491  			return false, fmt.Errorf("subChannel does not exist")
  1492  		}
  1493  		if scm.Trace == nil {
  1494  			return false, fmt.Errorf("trace for subChannel should not be empty")
  1495  		}
  1496  		if len(scm.Trace.Events) == 0 {
  1497  			return false, fmt.Errorf("there should be at least one trace event for subChannel not 0")
  1498  		}
  1499  		pattern := `Subchannel created`
  1500  		if ok, _ := regexp.MatchString(pattern, scm.Trace.Events[0].Desc); !ok {
  1501  			return false, fmt.Errorf("the first trace event should be %q, not %q", pattern, scm.Trace.Events[0].Desc)
  1502  		}
  1503  		return true, nil
  1504  	}); err != nil {
  1505  		t.Fatal(err)
  1506  	}
  1507  
  1508  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1509  	defer cancel()
  1510  	testutils.AwaitState(ctx, t, te.cc, connectivity.Ready)
  1511  	r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "fake address"}}})
  1512  	testutils.AwaitNotState(ctx, t, te.cc, connectivity.Ready)
  1513  
  1514  	if err := verifyResultWithDelay(func() (bool, error) {
  1515  		tcs, _ := channelz.GetTopChannels(0, 0)
  1516  		if len(tcs) != 1 {
  1517  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
  1518  		}
  1519  		if len(tcs[0].SubChans) != 1 {
  1520  			return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans))
  1521  		}
  1522  		scm := channelz.GetSubChannel(subConn)
  1523  		if scm == nil {
  1524  			return false, fmt.Errorf("subChannel should still exist due to parent's trace reference")
  1525  		}
  1526  		if scm.Trace == nil {
  1527  			return false, fmt.Errorf("trace for SubChannel should not be empty")
  1528  		}
  1529  		if len(scm.Trace.Events) == 0 {
  1530  			return false, fmt.Errorf("there should be at least one trace event for subChannel not 0")
  1531  		}
  1532  
  1533  		pattern := `Subchannel deleted`
  1534  		desc := scm.Trace.Events[len(scm.Trace.Events)-1].Desc
  1535  		if ok, _ := regexp.MatchString(pattern, desc); !ok {
  1536  			return false, fmt.Errorf("the last trace event should be %q, not %q", pattern, desc)
  1537  		}
  1538  		return true, nil
  1539  	}); err != nil {
  1540  		t.Fatal(err)
  1541  	}
  1542  }
  1543  
  1544  func (s) TestCZChannelAddressResolutionChange(t *testing.T) {
  1545  	e := tcpClearRREnv
  1546  	e.balancer = ""
  1547  	te := newTest(t, e)
  1548  	te.startServer(&testServer{security: e.security})
  1549  	r := manual.NewBuilderWithScheme("whatever")
  1550  	addrs := []resolver.Address{{Addr: te.srvAddr}}
  1551  	r.InitialState(resolver.State{Addresses: addrs})
  1552  	te.resolverScheme = r.Scheme()
  1553  	te.clientConn(grpc.WithResolvers(r))
  1554  	defer te.tearDown()
  1555  	var cid int64
  1556  	// Here, we just wait for all sockets to be up. In the future, if we implement
  1557  	// IDLE, we may need to make several rpc calls to create the sockets.
  1558  	if err := verifyResultWithDelay(func() (bool, error) {
  1559  		tcs, _ := channelz.GetTopChannels(0, 0)
  1560  		if len(tcs) != 1 {
  1561  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
  1562  		}
  1563  		cid = tcs[0].ID
  1564  		for i := len(tcs[0].Trace.Events) - 1; i >= 0; i-- {
  1565  			if strings.Contains(tcs[0].Trace.Events[i].Desc, "resolver returned new addresses") {
  1566  				break
  1567  			}
  1568  			if i == 0 {
  1569  				return false, fmt.Errorf("events do not contain expected address resolution from empty address state.  Got: %+v", tcs[0].Trace.Events)
  1570  			}
  1571  		}
  1572  		return true, nil
  1573  	}); err != nil {
  1574  		t.Fatal(err)
  1575  	}
  1576  	r.UpdateState(resolver.State{
  1577  		Addresses:     addrs,
  1578  		ServiceConfig: parseServiceConfig(t, r, `{"loadBalancingPolicy": "round_robin"}`),
  1579  	})
  1580  
  1581  	if err := verifyResultWithDelay(func() (bool, error) {
  1582  		cm := channelz.GetChannel(cid)
  1583  		for i := len(cm.Trace.Events) - 1; i >= 0; i-- {
  1584  			if strings.Contains(cm.Trace.Events[i].Desc, fmt.Sprintf("Channel switches to new LB policy %q", roundrobin.Name)) {
  1585  				break
  1586  			}
  1587  			if i == 0 {
  1588  				return false, fmt.Errorf("events do not contain expected address resolution change of LB policy")
  1589  			}
  1590  		}
  1591  		return true, nil
  1592  	}); err != nil {
  1593  		t.Fatal(err)
  1594  	}
  1595  
  1596  	newSC := parseServiceConfig(t, r, `{
  1597      "methodConfig": [
  1598          {
  1599              "name": [
  1600                  {
  1601                      "service": "grpc.testing.TestService",
  1602                      "method": "EmptyCall"
  1603                  }
  1604              ],
  1605              "waitForReady": false,
  1606              "timeout": ".001s"
  1607          }
  1608      ]
  1609  }`)
  1610  	r.UpdateState(resolver.State{Addresses: addrs, ServiceConfig: newSC})
  1611  
  1612  	if err := verifyResultWithDelay(func() (bool, error) {
  1613  		cm := channelz.GetChannel(cid)
  1614  
  1615  		var es []string
  1616  		for i := len(cm.Trace.Events) - 1; i >= 0; i-- {
  1617  			if strings.Contains(cm.Trace.Events[i].Desc, "service config updated") {
  1618  				break
  1619  			}
  1620  			es = append(es, cm.Trace.Events[i].Desc)
  1621  			if i == 0 {
  1622  				return false, fmt.Errorf("events do not contain expected address resolution of new service config\n Events:\n%v", strings.Join(es, "\n"))
  1623  			}
  1624  		}
  1625  		return true, nil
  1626  	}); err != nil {
  1627  		t.Fatal(err)
  1628  	}
  1629  
  1630  	r.UpdateState(resolver.State{Addresses: []resolver.Address{}, ServiceConfig: newSC})
  1631  
  1632  	if err := verifyResultWithDelay(func() (bool, error) {
  1633  		cm := channelz.GetChannel(cid)
  1634  		for i := len(cm.Trace.Events) - 1; i >= 0; i-- {
  1635  			if strings.Contains(cm.Trace.Events[i].Desc, "resolver returned an empty address list") {
  1636  				break
  1637  			}
  1638  			if i == 0 {
  1639  				return false, fmt.Errorf("events do not contain expected address resolution of empty address")
  1640  			}
  1641  		}
  1642  		return true, nil
  1643  	}); err != nil {
  1644  		t.Fatal(err)
  1645  	}
  1646  }
  1647  
  1648  func (s) TestCZSubChannelPickedNewAddress(t *testing.T) {
  1649  	e := tcpClearRREnv
  1650  	e.balancer = ""
  1651  	te := newTest(t, e)
  1652  	te.startServers(&testServer{security: e.security}, 3)
  1653  	r := manual.NewBuilderWithScheme("whatever")
  1654  	var svrAddrs []resolver.Address
  1655  	for _, a := range te.srvAddrs {
  1656  		svrAddrs = append(svrAddrs, resolver.Address{Addr: a})
  1657  	}
  1658  	r.InitialState(resolver.State{Addresses: svrAddrs})
  1659  	te.resolverScheme = r.Scheme()
  1660  	cc := te.clientConn(grpc.WithResolvers(r))
  1661  	defer te.tearDown()
  1662  	tc := testgrpc.NewTestServiceClient(cc)
  1663  	// make sure the connection is up
  1664  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1665  	defer cancel()
  1666  	if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil {
  1667  		t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err)
  1668  	}
  1669  	te.srvs[0].Stop()
  1670  	te.srvs[1].Stop()
  1671  	// Here, we just wait for all sockets to be up. Make several rpc calls to
  1672  	// create the sockets since we do not automatically reconnect.
  1673  	done := make(chan struct{})
  1674  	defer close(done)
  1675  	go func() {
  1676  		for {
  1677  			tc.EmptyCall(ctx, &testpb.Empty{})
  1678  			select {
  1679  			case <-time.After(10 * time.Millisecond):
  1680  			case <-done:
  1681  				return
  1682  			}
  1683  		}
  1684  	}()
  1685  	if err := verifyResultWithDelay(func() (bool, error) {
  1686  		tcs, _ := channelz.GetTopChannels(0, 0)
  1687  		if len(tcs) != 1 {
  1688  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
  1689  		}
  1690  		if len(tcs[0].SubChans) != 1 {
  1691  			return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans))
  1692  		}
  1693  		var subConn int64
  1694  		for k := range tcs[0].SubChans {
  1695  			subConn = k
  1696  		}
  1697  		scm := channelz.GetSubChannel(subConn)
  1698  		if scm.Trace == nil {
  1699  			return false, fmt.Errorf("trace for SubChannel should not be empty")
  1700  		}
  1701  		if len(scm.Trace.Events) == 0 {
  1702  			return false, fmt.Errorf("there should be at least one trace event for subChannel not 0")
  1703  		}
  1704  		for i := len(scm.Trace.Events) - 1; i >= 0; i-- {
  1705  			if strings.Contains(scm.Trace.Events[i].Desc, fmt.Sprintf("Subchannel picks a new address %q to connect", te.srvAddrs[2])) {
  1706  				break
  1707  			}
  1708  			if i == 0 {
  1709  				return false, fmt.Errorf("events do not contain expected address resolution of subchannel picked new address")
  1710  			}
  1711  		}
  1712  		return true, nil
  1713  	}); err != nil {
  1714  		t.Fatal(err)
  1715  	}
  1716  }
  1717  
  1718  func (s) TestCZSubChannelConnectivityState(t *testing.T) {
  1719  	e := tcpClearRREnv
  1720  	te := newTest(t, e)
  1721  	te.startServer(&testServer{security: e.security})
  1722  	r := manual.NewBuilderWithScheme("whatever")
  1723  	r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}})
  1724  	te.resolverScheme = r.Scheme()
  1725  	cc := te.clientConn(grpc.WithResolvers(r))
  1726  	defer te.tearDown()
  1727  	tc := testgrpc.NewTestServiceClient(cc)
  1728  	// make sure the connection is up
  1729  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1730  	defer cancel()
  1731  	if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil {
  1732  		t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err)
  1733  	}
  1734  	te.srv.Stop()
  1735  
  1736  	var subConn int64
  1737  	if err := verifyResultWithDelay(func() (bool, error) {
  1738  		// we need to obtain the SubChannel id before it gets deleted from Channel's children list (due
  1739  		// to effect of r.UpdateState(resolver.State{Addresses:[]resolver.Address{}}))
  1740  		if subConn == 0 {
  1741  			tcs, _ := channelz.GetTopChannels(0, 0)
  1742  			if len(tcs) != 1 {
  1743  				return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
  1744  			}
  1745  			if len(tcs[0].SubChans) != 1 {
  1746  				return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans))
  1747  			}
  1748  			for k := range tcs[0].SubChans {
  1749  				// get the SubChannel id for further trace inquiry.
  1750  				subConn = k
  1751  				t.Logf("SubChannel Id is %d", subConn)
  1752  			}
  1753  		}
  1754  		scm := channelz.GetSubChannel(subConn)
  1755  		if scm == nil {
  1756  			return false, fmt.Errorf("subChannel should still exist due to parent's trace reference")
  1757  		}
  1758  		if scm.Trace == nil {
  1759  			return false, fmt.Errorf("trace for SubChannel should not be empty")
  1760  		}
  1761  		if len(scm.Trace.Events) == 0 {
  1762  			return false, fmt.Errorf("there should be at least one trace event for subChannel not 0")
  1763  		}
  1764  		var ready, connecting, transient, shutdown int
  1765  		t.Log("SubChannel trace events seen so far...")
  1766  		for _, e := range scm.Trace.Events {
  1767  			t.Log(e.Desc)
  1768  			if strings.Contains(e.Desc, fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.TransientFailure)) {
  1769  				transient++
  1770  			}
  1771  		}
  1772  		// Make sure the SubChannel has already seen transient failure before shutting it down through
  1773  		// r.UpdateState(resolver.State{Addresses:[]resolver.Address{}}).
  1774  		if transient == 0 {
  1775  			return false, fmt.Errorf("transient failure has not happened on SubChannel yet")
  1776  		}
  1777  		transient = 0
  1778  		r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "fake address"}}})
  1779  		t.Log("SubChannel trace events seen so far...")
  1780  		for _, e := range scm.Trace.Events {
  1781  			t.Log(e.Desc)
  1782  			if strings.Contains(e.Desc, fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.Ready)) {
  1783  				ready++
  1784  			}
  1785  			if strings.Contains(e.Desc, fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.Connecting)) {
  1786  				connecting++
  1787  			}
  1788  			if strings.Contains(e.Desc, fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.TransientFailure)) {
  1789  				transient++
  1790  			}
  1791  			if strings.Contains(e.Desc, fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.Shutdown)) {
  1792  				shutdown++
  1793  			}
  1794  		}
  1795  		// example:
  1796  		// Subchannel Created
  1797  		// Subchannel's connectivity state changed to CONNECTING
  1798  		// Subchannel picked a new address: "localhost:36011"
  1799  		// Subchannel's connectivity state changed to READY
  1800  		// Subchannel's connectivity state changed to TRANSIENT_FAILURE
  1801  		// Subchannel's connectivity state changed to CONNECTING
  1802  		// Subchannel picked a new address: "localhost:36011"
  1803  		// Subchannel's connectivity state changed to SHUTDOWN
  1804  		// Subchannel Deleted
  1805  		if ready != 1 || connecting < 1 || transient < 1 || shutdown != 1 {
  1806  			return false, fmt.Errorf("got: ready = %d, connecting = %d, transient = %d, shutdown = %d, want: 1, >=1, >=1, 1", ready, connecting, transient, shutdown)
  1807  		}
  1808  
  1809  		return true, nil
  1810  	}); err != nil {
  1811  		t.Fatal(err)
  1812  	}
  1813  }
  1814  
  1815  func (s) TestCZChannelConnectivityState(t *testing.T) {
  1816  	e := tcpClearRREnv
  1817  	te := newTest(t, e)
  1818  	te.startServer(&testServer{security: e.security})
  1819  	r := manual.NewBuilderWithScheme("whatever")
  1820  	r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}})
  1821  	te.resolverScheme = r.Scheme()
  1822  	cc := te.clientConn(grpc.WithResolvers(r))
  1823  	defer te.tearDown()
  1824  	tc := testgrpc.NewTestServiceClient(cc)
  1825  	// make sure the connection is up
  1826  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1827  	defer cancel()
  1828  	if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil {
  1829  		t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err)
  1830  	}
  1831  	te.srv.Stop()
  1832  
  1833  	if err := verifyResultWithDelay(func() (bool, error) {
  1834  		tcs, _ := channelz.GetTopChannels(0, 0)
  1835  		if len(tcs) != 1 {
  1836  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
  1837  		}
  1838  
  1839  		var ready, connecting, transient int
  1840  		t.Log("Channel trace events seen so far...")
  1841  		for _, e := range tcs[0].Trace.Events {
  1842  			t.Log(e.Desc)
  1843  			if strings.Contains(e.Desc, fmt.Sprintf("Channel Connectivity change to %v", connectivity.Ready)) {
  1844  				ready++
  1845  			}
  1846  			if strings.Contains(e.Desc, fmt.Sprintf("Channel Connectivity change to %v", connectivity.Connecting)) {
  1847  				connecting++
  1848  			}
  1849  			if strings.Contains(e.Desc, fmt.Sprintf("Channel Connectivity change to %v", connectivity.TransientFailure)) {
  1850  				transient++
  1851  			}
  1852  		}
  1853  
  1854  		// example:
  1855  		// Channel Created
  1856  		// Adressses resolved (from empty address state): "localhost:40467"
  1857  		// SubChannel (id: 4[]) Created
  1858  		// Channel's connectivity state changed to CONNECTING
  1859  		// Channel's connectivity state changed to READY
  1860  		// Channel's connectivity state changed to TRANSIENT_FAILURE
  1861  		// Channel's connectivity state changed to CONNECTING
  1862  		// Channel's connectivity state changed to TRANSIENT_FAILURE
  1863  		if ready != 1 || connecting < 1 || transient < 1 {
  1864  			return false, fmt.Errorf("got: ready = %d, connecting = %d, transient = %d, want: 1, >=1, >=1", ready, connecting, transient)
  1865  		}
  1866  		return true, nil
  1867  	}); err != nil {
  1868  		t.Fatal(err)
  1869  	}
  1870  }
  1871  
  1872  func (s) TestCZTraceOverwriteChannelDeletion(t *testing.T) {
  1873  	e := tcpClearRREnv
  1874  	e.balancer = ""
  1875  	te := newTest(t, e)
  1876  	channelz.SetMaxTraceEntry(1)
  1877  	defer channelz.ResetMaxTraceEntryToDefault()
  1878  	r := manual.NewBuilderWithScheme("whatever")
  1879  	te.resolverScheme = r.Scheme()
  1880  	te.clientConn(grpc.WithResolvers(r))
  1881  	resolvedAddrs := []resolver.Address{{Addr: "127.0.0.1:0", ServerName: "grpclb.server"}}
  1882  	grpclbConfig := parseServiceConfig(t, r, `{"loadBalancingPolicy": "grpclb"}`)
  1883  	r.UpdateState(grpclbstate.Set(resolver.State{ServiceConfig: grpclbConfig}, &grpclbstate.State{BalancerAddresses: resolvedAddrs}))
  1884  	defer te.tearDown()
  1885  	var nestedConn int64
  1886  	if err := verifyResultWithDelay(func() (bool, error) {
  1887  		tcs, _ := channelz.GetTopChannels(0, 0)
  1888  		if len(tcs) != 1 {
  1889  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
  1890  		}
  1891  		if len(tcs[0].NestedChans) != 1 {
  1892  			return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(tcs[0].NestedChans))
  1893  		}
  1894  		for k := range tcs[0].NestedChans {
  1895  			nestedConn = k
  1896  		}
  1897  		return true, nil
  1898  	}); err != nil {
  1899  		t.Fatal(err)
  1900  	}
  1901  
  1902  	r.UpdateState(resolver.State{
  1903  		Addresses:     []resolver.Address{{Addr: "127.0.0.1:0"}},
  1904  		ServiceConfig: parseServiceConfig(t, r, `{"loadBalancingPolicy": "round_robin"}`),
  1905  	})
  1906  
  1907  	// wait for the shutdown of grpclb balancer
  1908  	if err := verifyResultWithDelay(func() (bool, error) {
  1909  		tcs, _ := channelz.GetTopChannels(0, 0)
  1910  		if len(tcs) != 1 {
  1911  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
  1912  		}
  1913  		if len(tcs[0].NestedChans) != 0 {
  1914  			return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(tcs[0].NestedChans))
  1915  		}
  1916  		return true, nil
  1917  	}); err != nil {
  1918  		t.Fatal(err)
  1919  	}
  1920  
  1921  	// If nested channel deletion is last trace event before the next validation, it will fail, as the top channel will hold a reference to it.
  1922  	// This line forces a trace event on the top channel in that case.
  1923  	r.UpdateState(resolver.State{
  1924  		Addresses:     []resolver.Address{{Addr: "127.0.0.1:0"}},
  1925  		ServiceConfig: parseServiceConfig(t, r, `{"loadBalancingPolicy": "round_robin"}`),
  1926  	})
  1927  
  1928  	// verify that the nested channel no longer exist due to trace referencing it got overwritten.
  1929  	if err := verifyResultWithDelay(func() (bool, error) {
  1930  		cm := channelz.GetChannel(nestedConn)
  1931  		if cm != nil {
  1932  			return false, fmt.Errorf("nested channel should have been deleted since its parent's trace should not contain any reference to it anymore")
  1933  		}
  1934  		return true, nil
  1935  	}); err != nil {
  1936  		t.Fatal(err)
  1937  	}
  1938  }
  1939  
  1940  func (s) TestCZTraceOverwriteSubChannelDeletion(t *testing.T) {
  1941  	e := tcpClearRREnv
  1942  	te := newTest(t, e)
  1943  	channelz.SetMaxTraceEntry(1)
  1944  	defer channelz.ResetMaxTraceEntryToDefault()
  1945  	te.startServer(&testServer{security: e.security})
  1946  	r := manual.NewBuilderWithScheme("whatever")
  1947  	r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}})
  1948  	te.resolverScheme = r.Scheme()
  1949  	te.clientConn(grpc.WithResolvers(r))
  1950  	defer te.tearDown()
  1951  	var subConn int64
  1952  	// Here, we just wait for all sockets to be up. In the future, if we implement
  1953  	// IDLE, we may need to make several rpc calls to create the sockets.
  1954  	if err := verifyResultWithDelay(func() (bool, error) {
  1955  		tcs, _ := channelz.GetTopChannels(0, 0)
  1956  		if len(tcs) != 1 {
  1957  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
  1958  		}
  1959  		if len(tcs[0].SubChans) != 1 {
  1960  			return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans))
  1961  		}
  1962  		for k := range tcs[0].SubChans {
  1963  			subConn = k
  1964  		}
  1965  		return true, nil
  1966  	}); err != nil {
  1967  		t.Fatal(err)
  1968  	}
  1969  
  1970  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1971  	defer cancel()
  1972  	testutils.AwaitState(ctx, t, te.cc, connectivity.Ready)
  1973  	r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "fake address"}}})
  1974  	testutils.AwaitNotState(ctx, t, te.cc, connectivity.Ready)
  1975  
  1976  	// verify that the subchannel no longer exist due to trace referencing it got overwritten.
  1977  	if err := verifyResultWithDelay(func() (bool, error) {
  1978  		cm := channelz.GetChannel(subConn)
  1979  		if cm != nil {
  1980  			return false, fmt.Errorf("subchannel should have been deleted since its parent's trace should not contain any reference to it anymore")
  1981  		}
  1982  		return true, nil
  1983  	}); err != nil {
  1984  		t.Fatal(err)
  1985  	}
  1986  }
  1987  
  1988  func (s) TestCZTraceTopChannelDeletionTraceClear(t *testing.T) {
  1989  	e := tcpClearRREnv
  1990  	te := newTest(t, e)
  1991  	te.startServer(&testServer{security: e.security})
  1992  	r := manual.NewBuilderWithScheme("whatever")
  1993  	r.InitialState(resolver.State{Addresses: []resolver.Address{{Addr: te.srvAddr}}})
  1994  	te.resolverScheme = r.Scheme()
  1995  	te.clientConn(grpc.WithResolvers(r))
  1996  	var subConn int64
  1997  	// Here, we just wait for all sockets to be up. In the future, if we implement
  1998  	// IDLE, we may need to make several rpc calls to create the sockets.
  1999  	if err := verifyResultWithDelay(func() (bool, error) {
  2000  		tcs, _ := channelz.GetTopChannels(0, 0)
  2001  		if len(tcs) != 1 {
  2002  			return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs))
  2003  		}
  2004  		if len(tcs[0].SubChans) != 1 {
  2005  			return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans))
  2006  		}
  2007  		for k := range tcs[0].SubChans {
  2008  			subConn = k
  2009  		}
  2010  		return true, nil
  2011  	}); err != nil {
  2012  		t.Fatal(err)
  2013  	}
  2014  	te.tearDown()
  2015  	// verify that the subchannel no longer exist due to parent channel got deleted and its trace cleared.
  2016  	if err := verifyResultWithDelay(func() (bool, error) {
  2017  		cm := channelz.GetChannel(subConn)
  2018  		if cm != nil {
  2019  			return false, fmt.Errorf("subchannel should have been deleted since its parent's trace should not contain any reference to it anymore")
  2020  		}
  2021  		return true, nil
  2022  	}); err != nil {
  2023  		t.Fatal(err)
  2024  	}
  2025  }