github.com/lfch/etcd-io/tests/v3@v3.0.0-20221004140520-eac99acd3e9d/framework/integration/cluster.go (about)

     1  // Copyright 2016 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package integration
    16  
    17  import (
    18  	"context"
    19  	"crypto/tls"
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  	"log"
    24  	"math/rand"
    25  	"net"
    26  	"net/http"
    27  	"net/http/httptest"
    28  	"os"
    29  	"reflect"
    30  	"sort"
    31  	"strings"
    32  	"sync"
    33  	"sync/atomic"
    34  	"testing"
    35  	"time"
    36  
    37  	pb "github.com/lfch/etcd-io/api/v3/etcdserverpb"
    38  	"github.com/lfch/etcd-io/client/pkg/v3/testutil"
    39  	"github.com/lfch/etcd-io/client/pkg/v3/tlsutil"
    40  	"github.com/lfch/etcd-io/client/pkg/v3/transport"
    41  	"github.com/lfch/etcd-io/client/pkg/v3/types"
    42  	clientv3 "github.com/lfch/etcd-io/client/v3"
    43  	"github.com/lfch/etcd-io/pkg/v3/grpc_testing"
    44  	"github.com/lfch/etcd-io/raft/v3"
    45  	"github.com/lfch/etcd-io/server/v3/config"
    46  	"github.com/lfch/etcd-io/server/v3/embed"
    47  	"github.com/lfch/etcd-io/server/v3/etcdserver"
    48  	"github.com/lfch/etcd-io/server/v3/etcdserver/api/etcdhttp"
    49  	"github.com/lfch/etcd-io/server/v3/etcdserver/api/membership"
    50  	"github.com/lfch/etcd-io/server/v3/etcdserver/api/rafthttp"
    51  	"github.com/lfch/etcd-io/server/v3/etcdserver/api/v3client"
    52  	"github.com/lfch/etcd-io/server/v3/etcdserver/api/v3election"
    53  	epb "github.com/lfch/etcd-io/server/v3/etcdserver/api/v3election/v3electionpb"
    54  	"github.com/lfch/etcd-io/server/v3/etcdserver/api/v3lock"
    55  	lockpb "github.com/lfch/etcd-io/server/v3/etcdserver/api/v3lock/v3lockpb"
    56  	"github.com/lfch/etcd-io/server/v3/etcdserver/api/v3rpc"
    57  	"github.com/lfch/etcd-io/server/v3/verify"
    58  	framecfg "github.com/lfch/etcd-io/tests/v3/framework/config"
    59  	"go.uber.org/zap/zapcore"
    60  	"go.uber.org/zap/zaptest"
    61  
    62  	"github.com/google/go-cmp/cmp"
    63  	"github.com/google/go-cmp/cmp/cmpopts"
    64  	"github.com/soheilhy/cmux"
    65  	"go.uber.org/zap"
    66  	"golang.org/x/crypto/bcrypt"
    67  	"google.golang.org/grpc"
    68  	"google.golang.org/grpc/keepalive"
    69  )
    70  
    71  const (
    72  	// RequestWaitTimeout is the time duration to wait for a request to go through or detect leader loss.
    73  	RequestWaitTimeout = 5 * time.Second
    74  	RequestTimeout     = 20 * time.Second
    75  
    76  	ClusterName  = "etcd"
    77  	BasePort     = 21000
    78  	URLScheme    = "unix"
    79  	URLSchemeTLS = "unixs"
    80  	BaseGRPCPort = 30000
    81  )
    82  
    83  var (
    84  	ElectionTicks = 10
    85  
    86  	// LocalListenCount integration test uses unique ports, counting up, to listen for each
    87  	// member, ensuring restarted members can listen on the same port again.
    88  	LocalListenCount = int32(0)
    89  
    90  	TestTLSInfo = transport.TLSInfo{
    91  		KeyFile:        MustAbsPath("../fixtures/server.key.insecure"),
    92  		CertFile:       MustAbsPath("../fixtures/server.crt"),
    93  		TrustedCAFile:  MustAbsPath("../fixtures/ca.crt"),
    94  		ClientCertAuth: true,
    95  	}
    96  
    97  	TestTLSInfoWithSpecificUsage = transport.TLSInfo{
    98  		KeyFile:        MustAbsPath("../fixtures/server-serverusage.key.insecure"),
    99  		CertFile:       MustAbsPath("../fixtures/server-serverusage.crt"),
   100  		ClientKeyFile:  MustAbsPath("../fixtures/client-clientusage.key.insecure"),
   101  		ClientCertFile: MustAbsPath("../fixtures/client-clientusage.crt"),
   102  		TrustedCAFile:  MustAbsPath("../fixtures/ca.crt"),
   103  		ClientCertAuth: true,
   104  	}
   105  
   106  	TestTLSInfoIP = transport.TLSInfo{
   107  		KeyFile:        MustAbsPath("../fixtures/server-ip.key.insecure"),
   108  		CertFile:       MustAbsPath("../fixtures/server-ip.crt"),
   109  		TrustedCAFile:  MustAbsPath("../fixtures/ca.crt"),
   110  		ClientCertAuth: true,
   111  	}
   112  
   113  	TestTLSInfoExpired = transport.TLSInfo{
   114  		KeyFile:        MustAbsPath("./fixtures-expired/server.key.insecure"),
   115  		CertFile:       MustAbsPath("./fixtures-expired/server.crt"),
   116  		TrustedCAFile:  MustAbsPath("./fixtures-expired/ca.crt"),
   117  		ClientCertAuth: true,
   118  	}
   119  
   120  	TestTLSInfoExpiredIP = transport.TLSInfo{
   121  		KeyFile:        MustAbsPath("./fixtures-expired/server-ip.key.insecure"),
   122  		CertFile:       MustAbsPath("./fixtures-expired/server-ip.crt"),
   123  		TrustedCAFile:  MustAbsPath("./fixtures-expired/ca.crt"),
   124  		ClientCertAuth: true,
   125  	}
   126  
   127  	DefaultTokenJWT = fmt.Sprintf("jwt,pub-key=%s,priv-key=%s,sign-method=RS256,ttl=1s",
   128  		MustAbsPath("../fixtures/server.crt"), MustAbsPath("../fixtures/server.key.insecure"))
   129  
   130  	// UniqueNumber is used to generate unique port numbers
   131  	// Should only be accessed via atomic package methods.
   132  	UniqueNumber int32
   133  )
   134  
   135  type ClusterConfig struct {
   136  	Size      int
   137  	PeerTLS   *transport.TLSInfo
   138  	ClientTLS *transport.TLSInfo
   139  
   140  	DiscoveryURL string
   141  
   142  	AuthToken    string
   143  	AuthTokenTTL uint
   144  
   145  	QuotaBackendBytes int64
   146  
   147  	MaxTxnOps              uint
   148  	MaxRequestBytes        uint
   149  	SnapshotCount          uint64
   150  	SnapshotCatchUpEntries uint64
   151  
   152  	GRPCKeepAliveMinTime  time.Duration
   153  	GRPCKeepAliveInterval time.Duration
   154  	GRPCKeepAliveTimeout  time.Duration
   155  
   156  	ClientMaxCallSendMsgSize int
   157  	ClientMaxCallRecvMsgSize int
   158  
   159  	// UseIP is true to use only IP for gRPC requests.
   160  	UseIP bool
   161  	// UseBridge adds bridge between client and grpc server. Should be used in tests that
   162  	// want to manipulate connection or require connection not breaking despite server stop/restart.
   163  	UseBridge bool
   164  	// UseTCP configures server listen on tcp socket. If disabled unix socket is used.
   165  	UseTCP bool
   166  
   167  	EnableLeaseCheckpoint   bool
   168  	LeaseCheckpointInterval time.Duration
   169  	LeaseCheckpointPersist  bool
   170  
   171  	WatchProgressNotifyInterval time.Duration
   172  	ExperimentalMaxLearners     int
   173  	DisableStrictReconfigCheck  bool
   174  	CorruptCheckTime            time.Duration
   175  }
   176  
   177  type Cluster struct {
   178  	Cfg           *ClusterConfig
   179  	Members       []*Member
   180  	LastMemberNum int
   181  
   182  	mu sync.Mutex
   183  }
   184  
   185  func SchemeFromTLSInfo(tls *transport.TLSInfo) string {
   186  	if tls == nil {
   187  		return URLScheme
   188  	}
   189  	return URLSchemeTLS
   190  }
   191  
   192  func (c *Cluster) fillClusterForMembers() error {
   193  	if c.Cfg.DiscoveryURL != "" {
   194  		// Cluster will be discovered
   195  		return nil
   196  	}
   197  
   198  	addrs := make([]string, 0)
   199  	for _, m := range c.Members {
   200  		scheme := SchemeFromTLSInfo(m.PeerTLSInfo)
   201  		for _, l := range m.PeerListeners {
   202  			addrs = append(addrs, fmt.Sprintf("%s=%s://%s", m.Name, scheme, l.Addr().String()))
   203  		}
   204  	}
   205  	clusterStr := strings.Join(addrs, ",")
   206  	var err error
   207  	for _, m := range c.Members {
   208  		m.InitialPeerURLsMap, err = types.NewURLsMap(clusterStr)
   209  		if err != nil {
   210  			return err
   211  		}
   212  	}
   213  	return nil
   214  }
   215  
   216  func (c *Cluster) Launch(t testutil.TB) {
   217  	t.Logf("Launching new cluster...")
   218  	errc := make(chan error)
   219  	for _, m := range c.Members {
   220  		// Members are launched in separate goroutines because if they boot
   221  		// using discovery url, they have to wait for others to register to continue.
   222  		go func(m *Member) {
   223  			errc <- m.Launch()
   224  		}(m)
   225  	}
   226  	for range c.Members {
   227  		if err := <-errc; err != nil {
   228  			c.Terminate(t)
   229  			t.Fatalf("error setting up member: %v", err)
   230  		}
   231  	}
   232  	// wait Cluster to be stable to receive future client requests
   233  	c.WaitMembersMatch(t, c.ProtoMembers())
   234  	c.waitVersion()
   235  	for _, m := range c.Members {
   236  		t.Logf(" - %v -> %v (%v)", m.Name, m.ID(), m.GRPCURL())
   237  	}
   238  }
   239  
   240  // ProtoMembers returns a list of all active members as client.Members
   241  func (c *Cluster) ProtoMembers() []*pb.Member {
   242  	var ms []*pb.Member
   243  	for _, m := range c.Members {
   244  		pScheme := SchemeFromTLSInfo(m.PeerTLSInfo)
   245  		cScheme := SchemeFromTLSInfo(m.ClientTLSInfo)
   246  		cm := &pb.Member{Name: m.Name}
   247  		for _, ln := range m.PeerListeners {
   248  			cm.PeerURLs = append(cm.PeerURLs, pScheme+"://"+ln.Addr().String())
   249  		}
   250  		for _, ln := range m.ClientListeners {
   251  			cm.ClientURLs = append(cm.ClientURLs, cScheme+"://"+ln.Addr().String())
   252  		}
   253  		ms = append(ms, cm)
   254  	}
   255  	return ms
   256  }
   257  
   258  func (c *Cluster) mustNewMember(t testutil.TB) *Member {
   259  	memberNumber := c.LastMemberNum
   260  	c.LastMemberNum++
   261  	m := MustNewMember(t,
   262  		MemberConfig{
   263  			Name:                        fmt.Sprintf("m%v", memberNumber),
   264  			MemberNumber:                memberNumber,
   265  			AuthToken:                   c.Cfg.AuthToken,
   266  			AuthTokenTTL:                c.Cfg.AuthTokenTTL,
   267  			PeerTLS:                     c.Cfg.PeerTLS,
   268  			ClientTLS:                   c.Cfg.ClientTLS,
   269  			QuotaBackendBytes:           c.Cfg.QuotaBackendBytes,
   270  			MaxTxnOps:                   c.Cfg.MaxTxnOps,
   271  			MaxRequestBytes:             c.Cfg.MaxRequestBytes,
   272  			SnapshotCount:               c.Cfg.SnapshotCount,
   273  			SnapshotCatchUpEntries:      c.Cfg.SnapshotCatchUpEntries,
   274  			GrpcKeepAliveMinTime:        c.Cfg.GRPCKeepAliveMinTime,
   275  			GrpcKeepAliveInterval:       c.Cfg.GRPCKeepAliveInterval,
   276  			GrpcKeepAliveTimeout:        c.Cfg.GRPCKeepAliveTimeout,
   277  			ClientMaxCallSendMsgSize:    c.Cfg.ClientMaxCallSendMsgSize,
   278  			ClientMaxCallRecvMsgSize:    c.Cfg.ClientMaxCallRecvMsgSize,
   279  			UseIP:                       c.Cfg.UseIP,
   280  			UseBridge:                   c.Cfg.UseBridge,
   281  			UseTCP:                      c.Cfg.UseTCP,
   282  			EnableLeaseCheckpoint:       c.Cfg.EnableLeaseCheckpoint,
   283  			LeaseCheckpointInterval:     c.Cfg.LeaseCheckpointInterval,
   284  			LeaseCheckpointPersist:      c.Cfg.LeaseCheckpointPersist,
   285  			WatchProgressNotifyInterval: c.Cfg.WatchProgressNotifyInterval,
   286  			ExperimentalMaxLearners:     c.Cfg.ExperimentalMaxLearners,
   287  			DisableStrictReconfigCheck:  c.Cfg.DisableStrictReconfigCheck,
   288  			CorruptCheckTime:            c.Cfg.CorruptCheckTime,
   289  		})
   290  	m.DiscoveryURL = c.Cfg.DiscoveryURL
   291  	return m
   292  }
   293  
   294  // addMember return PeerURLs of the added member.
   295  func (c *Cluster) addMember(t testutil.TB) types.URLs {
   296  	m := c.mustNewMember(t)
   297  
   298  	scheme := SchemeFromTLSInfo(c.Cfg.PeerTLS)
   299  
   300  	// send add request to the Cluster
   301  	var err error
   302  	for i := 0; i < len(c.Members); i++ {
   303  		peerURL := scheme + "://" + m.PeerListeners[0].Addr().String()
   304  		if err = c.AddMemberByURL(t, c.Members[i].Client, peerURL); err == nil {
   305  			break
   306  		}
   307  	}
   308  	if err != nil {
   309  		t.Fatalf("add member failed on all members error: %v", err)
   310  	}
   311  
   312  	m.InitialPeerURLsMap = types.URLsMap{}
   313  	for _, mm := range c.Members {
   314  		m.InitialPeerURLsMap[mm.Name] = mm.PeerURLs
   315  	}
   316  	m.InitialPeerURLsMap[m.Name] = m.PeerURLs
   317  	m.NewCluster = false
   318  	if err := m.Launch(); err != nil {
   319  		t.Fatal(err)
   320  	}
   321  	c.Members = append(c.Members, m)
   322  	// wait Cluster to be stable to receive future client requests
   323  	c.WaitMembersMatch(t, c.ProtoMembers())
   324  	return m.PeerURLs
   325  }
   326  
   327  func (c *Cluster) AddMemberByURL(t testutil.TB, cc *clientv3.Client, peerURL string) error {
   328  	ctx, cancel := context.WithTimeout(context.Background(), RequestTimeout)
   329  	_, err := cc.MemberAdd(ctx, []string{peerURL})
   330  	cancel()
   331  	if err != nil {
   332  		return err
   333  	}
   334  
   335  	// wait for the add node entry applied in the Cluster
   336  	members := append(c.ProtoMembers(), &pb.Member{PeerURLs: []string{peerURL}, ClientURLs: []string{}})
   337  	c.WaitMembersMatch(t, members)
   338  	return nil
   339  }
   340  
   341  // AddMember return PeerURLs of the added member.
   342  func (c *Cluster) AddMember(t testutil.TB) types.URLs {
   343  	return c.addMember(t)
   344  }
   345  
   346  func (c *Cluster) RemoveMember(t testutil.TB, cc *clientv3.Client, id uint64) error {
   347  	// send remove request to the Cluster
   348  
   349  	ctx, cancel := context.WithTimeout(context.Background(), RequestTimeout)
   350  	_, err := cc.MemberRemove(ctx, id)
   351  	cancel()
   352  	if err != nil {
   353  		return err
   354  	}
   355  	newMembers := make([]*Member, 0)
   356  	for _, m := range c.Members {
   357  		if uint64(m.Server.MemberId()) != id {
   358  			newMembers = append(newMembers, m)
   359  		} else {
   360  			m.Client.Close()
   361  			select {
   362  			case <-m.Server.StopNotify():
   363  				m.Terminate(t)
   364  			// 1s stop delay + election timeout + 1s disk and network delay + connection write timeout
   365  			// TODO: remove connection write timeout by selecting on http response closeNotifier
   366  			// blocking on https://github.com/golang/go/issues/9524
   367  			case <-time.After(time.Second + time.Duration(ElectionTicks)*framecfg.TickDuration + time.Second + rafthttp.ConnWriteTimeout):
   368  				t.Fatalf("failed to remove member %s in time", m.Server.MemberId())
   369  			}
   370  		}
   371  	}
   372  	c.Members = newMembers
   373  	c.WaitMembersMatch(t, c.ProtoMembers())
   374  	return nil
   375  }
   376  
   377  func (c *Cluster) WaitMembersMatch(t testutil.TB, membs []*pb.Member) {
   378  	ctx, cancel := context.WithTimeout(context.Background(), RequestTimeout)
   379  	defer cancel()
   380  	for _, m := range c.Members {
   381  		cc := ToGRPC(m.Client)
   382  		select {
   383  		case <-m.Server.StopNotify():
   384  			continue
   385  		default:
   386  		}
   387  		for {
   388  			resp, err := cc.Cluster.MemberList(ctx, &pb.MemberListRequest{Linearizable: false})
   389  			if errors.Is(err, context.DeadlineExceeded) {
   390  				t.Fatal(err)
   391  			}
   392  			if err != nil {
   393  				continue
   394  			}
   395  			if isMembersEqual(resp.Members, membs) {
   396  				break
   397  			}
   398  			time.Sleep(framecfg.TickDuration)
   399  		}
   400  	}
   401  }
   402  
   403  // WaitLeader returns index of the member in c.Members that is leader
   404  // or fails the test (if not established in 30s).
   405  func (c *Cluster) WaitLeader(t testing.TB) int {
   406  	return c.WaitMembersForLeader(t, c.Members)
   407  }
   408  
   409  // WaitMembersForLeader waits until given members agree on the same leader,
   410  // and returns its 'index' in the 'membs' list
   411  func (c *Cluster) WaitMembersForLeader(t testing.TB, membs []*Member) int {
   412  	t.Logf("WaitMembersForLeader")
   413  	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   414  	defer cancel()
   415  	l := 0
   416  	for l = c.waitMembersForLeader(ctx, t, membs); l < 0; {
   417  		if ctx.Err() != nil {
   418  			t.Fatalf("WaitLeader FAILED: %v", ctx.Err())
   419  		}
   420  	}
   421  	t.Logf("WaitMembersForLeader succeeded. Cluster leader index: %v", l)
   422  
   423  	// TODO: Consider second pass check as sometimes leadership is lost
   424  	// soon after election:
   425  	//
   426  	// We perform multiple attempts, as some-times just after successful WaitLLeader
   427  	// there is a race and leadership is quickly lost:
   428  	//   - MsgAppResp message with higher term from 2acc3d3b521981 [term: 3]	{"member": "m0"}
   429  	//   - 9903a56eaf96afac became follower at term 3	{"member": "m0"}
   430  	//   - 9903a56eaf96afac lost leader 9903a56eaf96afac at term 3	{"member": "m0"}
   431  
   432  	return l
   433  }
   434  
   435  // WaitMembersForLeader waits until given members agree on the same leader,
   436  // and returns its 'index' in the 'membs' list
   437  func (c *Cluster) waitMembersForLeader(ctx context.Context, t testing.TB, membs []*Member) int {
   438  	possibleLead := make(map[uint64]bool)
   439  	var lead uint64
   440  	for _, m := range membs {
   441  		possibleLead[uint64(m.Server.MemberId())] = true
   442  	}
   443  	cc, err := c.ClusterClient(t)
   444  	if err != nil {
   445  		t.Fatal(err)
   446  	}
   447  	// ensure leader is up via linearizable get
   448  	for {
   449  		ctx, cancel := context.WithTimeout(ctx, 10*framecfg.TickDuration+time.Second)
   450  		_, err := cc.Get(ctx, "0")
   451  		cancel()
   452  		if err == nil || strings.Contains(err.Error(), "Key not found") {
   453  			break
   454  		}
   455  	}
   456  
   457  	for lead == 0 || !possibleLead[lead] {
   458  		lead = 0
   459  		for _, m := range membs {
   460  			select {
   461  			case <-m.Server.StopNotify():
   462  				continue
   463  			default:
   464  			}
   465  			if lead != 0 && lead != m.Server.Lead() {
   466  				lead = 0
   467  				time.Sleep(10 * framecfg.TickDuration)
   468  				break
   469  			}
   470  			lead = m.Server.Lead()
   471  		}
   472  	}
   473  
   474  	for i, m := range membs {
   475  		if uint64(m.Server.MemberId()) == lead {
   476  			t.Logf("waitMembersForLeader found leader. Member: %v lead: %x", i, lead)
   477  			return i
   478  		}
   479  	}
   480  
   481  	t.Logf("waitMembersForLeader failed (-1)")
   482  	return -1
   483  }
   484  
   485  func (c *Cluster) WaitNoLeader() { c.WaitMembersNoLeader(c.Members) }
   486  
   487  // WaitMembersNoLeader waits until given members lose leader.
   488  func (c *Cluster) WaitMembersNoLeader(membs []*Member) {
   489  	noLeader := false
   490  	for !noLeader {
   491  		noLeader = true
   492  		for _, m := range membs {
   493  			select {
   494  			case <-m.Server.StopNotify():
   495  				continue
   496  			default:
   497  			}
   498  			if m.Server.Lead() != 0 {
   499  				noLeader = false
   500  				time.Sleep(10 * framecfg.TickDuration)
   501  				break
   502  			}
   503  		}
   504  	}
   505  }
   506  
   507  func (c *Cluster) waitVersion() {
   508  	for _, m := range c.Members {
   509  		for {
   510  			if m.Server.ClusterVersion() != nil {
   511  				break
   512  			}
   513  			time.Sleep(framecfg.TickDuration)
   514  		}
   515  	}
   516  }
   517  
   518  // isMembersEqual checks whether two members equal except ID field.
   519  // The given wmembs should always set ID field to empty string.
   520  func isMembersEqual(membs []*pb.Member, wmembs []*pb.Member) bool {
   521  	sort.Sort(SortableMemberSliceByPeerURLs(membs))
   522  	sort.Sort(SortableMemberSliceByPeerURLs(wmembs))
   523  	return cmp.Equal(membs, wmembs, cmpopts.IgnoreFields(pb.Member{}, "ID", "PeerURLs", "ClientURLs"))
   524  }
   525  
   526  func NewLocalListener(t testutil.TB) net.Listener {
   527  	c := atomic.AddInt32(&LocalListenCount, 1)
   528  	// Go 1.8+ allows only numbers in port
   529  	addr := fmt.Sprintf("127.0.0.1:%05d%05d", c+BasePort, os.Getpid())
   530  	return NewListenerWithAddr(t, addr)
   531  }
   532  
   533  func NewListenerWithAddr(t testutil.TB, addr string) net.Listener {
   534  	t.Logf("Creating listener with addr: %v", addr)
   535  	l, err := transport.NewUnixListener(addr)
   536  	if err != nil {
   537  		t.Fatal(err)
   538  	}
   539  	return l
   540  }
   541  
   542  type Member struct {
   543  	config.ServerConfig
   544  	UniqNumber                     int
   545  	MemberNumber                   int
   546  	PeerListeners, ClientListeners []net.Listener
   547  	GrpcListener                   net.Listener
   548  	// PeerTLSInfo enables peer TLS when set
   549  	PeerTLSInfo *transport.TLSInfo
   550  	// ClientTLSInfo enables client TLS when set
   551  	ClientTLSInfo *transport.TLSInfo
   552  	DialOptions   []grpc.DialOption
   553  
   554  	RaftHandler   *testutil.PauseableHandler
   555  	Server        *etcdserver.EtcdServer
   556  	ServerClosers []func()
   557  
   558  	GrpcServerOpts []grpc.ServerOption
   559  	GrpcServer     *grpc.Server
   560  	GrpcURL        string
   561  	GrpcBridge     *bridge
   562  
   563  	// ServerClient is a clientv3 that directly calls the etcdserver.
   564  	ServerClient *clientv3.Client
   565  	// Client is a clientv3 that communicates via socket, either UNIX or TCP.
   566  	Client *clientv3.Client
   567  
   568  	KeepDataDirTerminate     bool
   569  	ClientMaxCallSendMsgSize int
   570  	ClientMaxCallRecvMsgSize int
   571  	UseIP                    bool
   572  	UseBridge                bool
   573  	UseTCP                   bool
   574  
   575  	IsLearner bool
   576  	Closed    bool
   577  
   578  	GrpcServerRecorder *grpc_testing.GrpcRecorder
   579  }
   580  
   581  func (m *Member) GRPCURL() string { return m.GrpcURL }
   582  
   583  type MemberConfig struct {
   584  	Name                        string
   585  	UniqNumber                  int64
   586  	MemberNumber                int
   587  	PeerTLS                     *transport.TLSInfo
   588  	ClientTLS                   *transport.TLSInfo
   589  	AuthToken                   string
   590  	AuthTokenTTL                uint
   591  	QuotaBackendBytes           int64
   592  	MaxTxnOps                   uint
   593  	MaxRequestBytes             uint
   594  	SnapshotCount               uint64
   595  	SnapshotCatchUpEntries      uint64
   596  	GrpcKeepAliveMinTime        time.Duration
   597  	GrpcKeepAliveInterval       time.Duration
   598  	GrpcKeepAliveTimeout        time.Duration
   599  	ClientMaxCallSendMsgSize    int
   600  	ClientMaxCallRecvMsgSize    int
   601  	UseIP                       bool
   602  	UseBridge                   bool
   603  	UseTCP                      bool
   604  	EnableLeaseCheckpoint       bool
   605  	LeaseCheckpointInterval     time.Duration
   606  	LeaseCheckpointPersist      bool
   607  	WatchProgressNotifyInterval time.Duration
   608  	ExperimentalMaxLearners     int
   609  	DisableStrictReconfigCheck  bool
   610  	CorruptCheckTime            time.Duration
   611  }
   612  
   613  // MustNewMember return an inited member with the given name. If peerTLS is
   614  // set, it will use https scheme to communicate between peers.
   615  func MustNewMember(t testutil.TB, mcfg MemberConfig) *Member {
   616  	var err error
   617  	m := &Member{
   618  		MemberNumber: mcfg.MemberNumber,
   619  		UniqNumber:   int(atomic.AddInt32(&LocalListenCount, 1)),
   620  	}
   621  
   622  	peerScheme := SchemeFromTLSInfo(mcfg.PeerTLS)
   623  	clientScheme := SchemeFromTLSInfo(mcfg.ClientTLS)
   624  
   625  	pln := NewLocalListener(t)
   626  	m.PeerListeners = []net.Listener{pln}
   627  	m.PeerURLs, err = types.NewURLs([]string{peerScheme + "://" + pln.Addr().String()})
   628  	if err != nil {
   629  		t.Fatal(err)
   630  	}
   631  	m.PeerTLSInfo = mcfg.PeerTLS
   632  
   633  	cln := NewLocalListener(t)
   634  	m.ClientListeners = []net.Listener{cln}
   635  	m.ClientURLs, err = types.NewURLs([]string{clientScheme + "://" + cln.Addr().String()})
   636  	if err != nil {
   637  		t.Fatal(err)
   638  	}
   639  	m.ClientTLSInfo = mcfg.ClientTLS
   640  
   641  	m.Name = mcfg.Name
   642  
   643  	m.DataDir, err = os.MkdirTemp(t.TempDir(), "etcd")
   644  	if err != nil {
   645  		t.Fatal(err)
   646  	}
   647  	clusterStr := fmt.Sprintf("%s=%s://%s", mcfg.Name, peerScheme, pln.Addr().String())
   648  	m.InitialPeerURLsMap, err = types.NewURLsMap(clusterStr)
   649  	if err != nil {
   650  		t.Fatal(err)
   651  	}
   652  	m.InitialClusterToken = ClusterName
   653  	m.NewCluster = true
   654  	m.BootstrapTimeout = 10 * time.Millisecond
   655  	if m.PeerTLSInfo != nil {
   656  		m.ServerConfig.PeerTLSInfo = *m.PeerTLSInfo
   657  	}
   658  	m.ElectionTicks = ElectionTicks
   659  	m.InitialElectionTickAdvance = true
   660  	m.TickMs = uint(framecfg.TickDuration / time.Millisecond)
   661  	m.QuotaBackendBytes = mcfg.QuotaBackendBytes
   662  	m.MaxTxnOps = mcfg.MaxTxnOps
   663  	if m.MaxTxnOps == 0 {
   664  		m.MaxTxnOps = embed.DefaultMaxTxnOps
   665  	}
   666  	m.MaxRequestBytes = mcfg.MaxRequestBytes
   667  	if m.MaxRequestBytes == 0 {
   668  		m.MaxRequestBytes = embed.DefaultMaxRequestBytes
   669  	}
   670  	m.SnapshotCount = etcdserver.DefaultSnapshotCount
   671  	if mcfg.SnapshotCount != 0 {
   672  		m.SnapshotCount = mcfg.SnapshotCount
   673  	}
   674  	m.SnapshotCatchUpEntries = etcdserver.DefaultSnapshotCatchUpEntries
   675  	if mcfg.SnapshotCatchUpEntries != 0 {
   676  		m.SnapshotCatchUpEntries = mcfg.SnapshotCatchUpEntries
   677  	}
   678  
   679  	// for the purpose of integration testing, simple token is enough
   680  	m.AuthToken = "simple"
   681  	if mcfg.AuthToken != "" {
   682  		m.AuthToken = mcfg.AuthToken
   683  	}
   684  	if mcfg.AuthTokenTTL != 0 {
   685  		m.TokenTTL = mcfg.AuthTokenTTL
   686  	}
   687  
   688  	m.BcryptCost = uint(bcrypt.MinCost) // use min bcrypt cost to speedy up integration testing
   689  
   690  	m.GrpcServerOpts = []grpc.ServerOption{}
   691  	if mcfg.GrpcKeepAliveMinTime > time.Duration(0) {
   692  		m.GrpcServerOpts = append(m.GrpcServerOpts, grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{
   693  			MinTime:             mcfg.GrpcKeepAliveMinTime,
   694  			PermitWithoutStream: false,
   695  		}))
   696  	}
   697  	if mcfg.GrpcKeepAliveInterval > time.Duration(0) &&
   698  		mcfg.GrpcKeepAliveTimeout > time.Duration(0) {
   699  		m.GrpcServerOpts = append(m.GrpcServerOpts, grpc.KeepaliveParams(keepalive.ServerParameters{
   700  			Time:    mcfg.GrpcKeepAliveInterval,
   701  			Timeout: mcfg.GrpcKeepAliveTimeout,
   702  		}))
   703  	}
   704  	m.ClientMaxCallSendMsgSize = mcfg.ClientMaxCallSendMsgSize
   705  	m.ClientMaxCallRecvMsgSize = mcfg.ClientMaxCallRecvMsgSize
   706  	m.UseIP = mcfg.UseIP
   707  	m.UseBridge = mcfg.UseBridge
   708  	m.UseTCP = mcfg.UseTCP
   709  	m.EnableLeaseCheckpoint = mcfg.EnableLeaseCheckpoint
   710  	m.LeaseCheckpointInterval = mcfg.LeaseCheckpointInterval
   711  	m.LeaseCheckpointPersist = mcfg.LeaseCheckpointPersist
   712  
   713  	m.WatchProgressNotifyInterval = mcfg.WatchProgressNotifyInterval
   714  
   715  	m.InitialCorruptCheck = true
   716  	if mcfg.CorruptCheckTime > time.Duration(0) {
   717  		m.CorruptCheckTime = mcfg.CorruptCheckTime
   718  	}
   719  	m.WarningApplyDuration = embed.DefaultWarningApplyDuration
   720  	m.WarningUnaryRequestDuration = embed.DefaultWarningUnaryRequestDuration
   721  	m.ExperimentalMaxLearners = membership.DefaultMaxLearners
   722  	if mcfg.ExperimentalMaxLearners != 0 {
   723  		m.ExperimentalMaxLearners = mcfg.ExperimentalMaxLearners
   724  	}
   725  	m.V2Deprecation = config.V2_DEPR_DEFAULT
   726  	m.GrpcServerRecorder = &grpc_testing.GrpcRecorder{}
   727  	m.Logger = memberLogger(t, mcfg.Name)
   728  	m.StrictReconfigCheck = !mcfg.DisableStrictReconfigCheck
   729  	if err := m.listenGRPC(); err != nil {
   730  		t.Fatalf("listenGRPC FAILED: %v", err)
   731  	}
   732  	t.Cleanup(func() {
   733  		// if we didn't cleanup the logger, the consecutive test
   734  		// might reuse this (t).
   735  		raft.ResetDefaultLogger()
   736  	})
   737  	return m
   738  }
   739  
   740  func memberLogger(t testutil.TB, name string) *zap.Logger {
   741  	level := zapcore.InfoLevel
   742  	if os.Getenv("CLUSTER_DEBUG") != "" {
   743  		level = zapcore.DebugLevel
   744  	}
   745  
   746  	options := zaptest.WrapOptions(zap.Fields(zap.String("member", name)))
   747  	return zaptest.NewLogger(t, zaptest.Level(level), options).Named(name)
   748  }
   749  
   750  // listenGRPC starts a grpc server over a unix domain socket on the member
   751  func (m *Member) listenGRPC() error {
   752  	// prefix with localhost so cert has right domain
   753  	network, host, port := m.grpcAddr()
   754  	grpcAddr := host + ":" + port
   755  	wd, err := os.Getwd()
   756  	if err != nil {
   757  		return err
   758  	}
   759  	m.Logger.Info("LISTEN GRPC", zap.String("grpcAddr", grpcAddr), zap.String("m.Name", m.Name), zap.String("workdir", wd))
   760  	grpcListener, err := net.Listen(network, grpcAddr)
   761  	if err != nil {
   762  		return fmt.Errorf("listen failed on grpc socket %s (%v)", grpcAddr, err)
   763  	}
   764  	m.GrpcURL = fmt.Sprintf("%s://%s", m.clientScheme(), grpcAddr)
   765  	if m.UseBridge {
   766  		_, err = m.addBridge()
   767  		if err != nil {
   768  			grpcListener.Close()
   769  			return err
   770  		}
   771  	}
   772  	m.GrpcListener = grpcListener
   773  	return nil
   774  }
   775  
   776  func (m *Member) clientScheme() string {
   777  	switch {
   778  	case m.UseTCP && m.ClientTLSInfo != nil:
   779  		return "https"
   780  	case m.UseTCP && m.ClientTLSInfo == nil:
   781  		return "http"
   782  	case !m.UseTCP && m.ClientTLSInfo != nil:
   783  		return "unixs"
   784  	case !m.UseTCP && m.ClientTLSInfo == nil:
   785  		return "unix"
   786  	}
   787  	m.Logger.Panic("Failed to determine client schema")
   788  	return ""
   789  }
   790  
   791  func (m *Member) addBridge() (*bridge, error) {
   792  	network, host, port := m.grpcAddr()
   793  	grpcAddr := host + ":" + port
   794  	bridgeAddr := grpcAddr + "0"
   795  	m.Logger.Info("LISTEN BRIDGE", zap.String("grpc-address", bridgeAddr), zap.String("member", m.Name))
   796  	bridgeListener, err := transport.NewUnixListener(bridgeAddr)
   797  	if err != nil {
   798  		return nil, fmt.Errorf("listen failed on bridge socket %s (%v)", bridgeAddr, err)
   799  	}
   800  	m.GrpcBridge, err = newBridge(dialer{network: network, addr: grpcAddr}, bridgeListener)
   801  	if err != nil {
   802  		bridgeListener.Close()
   803  		return nil, err
   804  	}
   805  	m.GrpcURL = m.clientScheme() + "://" + bridgeAddr
   806  	return m.GrpcBridge, nil
   807  }
   808  
   809  func (m *Member) Bridge() *bridge {
   810  	if !m.UseBridge {
   811  		m.Logger.Panic("Bridge not available. Please configure using bridge before creating Cluster.")
   812  	}
   813  	return m.GrpcBridge
   814  }
   815  
   816  func (m *Member) grpcAddr() (network, host, port string) {
   817  	// prefix with localhost so cert has right domain
   818  	host = "localhost"
   819  	if m.UseIP { // for IP-only TLS certs
   820  		host = "127.0.0.1"
   821  	}
   822  	network = "unix"
   823  	if m.UseTCP {
   824  		network = "tcp"
   825  	}
   826  	port = m.Name
   827  	if m.UseTCP {
   828  		port = fmt.Sprintf("%d", GrpcPortNumber(m.UniqNumber, m.MemberNumber))
   829  	}
   830  	return network, host, port
   831  }
   832  
   833  func GrpcPortNumber(uniqNumber, memberNumber int) int {
   834  	return BaseGRPCPort + uniqNumber*10 + memberNumber
   835  }
   836  
   837  type dialer struct {
   838  	network string
   839  	addr    string
   840  }
   841  
   842  func (d dialer) Dial() (net.Conn, error) {
   843  	return net.Dial(d.network, d.addr)
   844  }
   845  
   846  func (m *Member) ElectionTimeout() time.Duration {
   847  	return time.Duration(m.Server.Cfg.ElectionTicks*int(m.Server.Cfg.TickMs)) * time.Millisecond
   848  }
   849  
   850  func (m *Member) ID() types.ID { return m.Server.MemberId() }
   851  
   852  // NewClientV3 creates a new grpc client connection to the member
   853  func NewClientV3(m *Member) (*clientv3.Client, error) {
   854  	if m.GrpcURL == "" {
   855  		return nil, fmt.Errorf("member not configured for grpc")
   856  	}
   857  
   858  	cfg := clientv3.Config{
   859  		Endpoints:          []string{m.GrpcURL},
   860  		DialTimeout:        5 * time.Second,
   861  		DialOptions:        []grpc.DialOption{grpc.WithBlock()},
   862  		MaxCallSendMsgSize: m.ClientMaxCallSendMsgSize,
   863  		MaxCallRecvMsgSize: m.ClientMaxCallRecvMsgSize,
   864  		Logger:             m.Logger.Named("client"),
   865  	}
   866  
   867  	if m.ClientTLSInfo != nil {
   868  		tls, err := m.ClientTLSInfo.ClientConfig()
   869  		if err != nil {
   870  			return nil, err
   871  		}
   872  		cfg.TLS = tls
   873  	}
   874  	if m.DialOptions != nil {
   875  		cfg.DialOptions = append(cfg.DialOptions, m.DialOptions...)
   876  	}
   877  	return newClientV3(cfg)
   878  }
   879  
   880  // Clone returns a member with the same server configuration. The returned
   881  // member will not set PeerListeners and ClientListeners.
   882  func (m *Member) Clone(t testutil.TB) *Member {
   883  	mm := &Member{}
   884  	mm.ServerConfig = m.ServerConfig
   885  
   886  	var err error
   887  	clientURLStrs := m.ClientURLs.StringSlice()
   888  	mm.ClientURLs, err = types.NewURLs(clientURLStrs)
   889  	if err != nil {
   890  		// this should never fail
   891  		panic(err)
   892  	}
   893  	peerURLStrs := m.PeerURLs.StringSlice()
   894  	mm.PeerURLs, err = types.NewURLs(peerURLStrs)
   895  	if err != nil {
   896  		// this should never fail
   897  		panic(err)
   898  	}
   899  	clusterStr := m.InitialPeerURLsMap.String()
   900  	mm.InitialPeerURLsMap, err = types.NewURLsMap(clusterStr)
   901  	if err != nil {
   902  		// this should never fail
   903  		panic(err)
   904  	}
   905  	mm.InitialClusterToken = m.InitialClusterToken
   906  	mm.ElectionTicks = m.ElectionTicks
   907  	mm.PeerTLSInfo = m.PeerTLSInfo
   908  	mm.ClientTLSInfo = m.ClientTLSInfo
   909  	mm.Logger = memberLogger(t, mm.Name+"c")
   910  	return mm
   911  }
   912  
   913  // Launch starts a member based on ServerConfig, PeerListeners
   914  // and ClientListeners.
   915  func (m *Member) Launch() error {
   916  	m.Logger.Info(
   917  		"launching a member",
   918  		zap.String("name", m.Name),
   919  		zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()),
   920  		zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()),
   921  		zap.String("grpc-url", m.GrpcURL),
   922  	)
   923  	var err error
   924  	if m.Server, err = etcdserver.NewServer(m.ServerConfig); err != nil {
   925  		return fmt.Errorf("failed to initialize the etcd server: %v", err)
   926  	}
   927  	m.Server.SyncTicker = time.NewTicker(500 * time.Millisecond)
   928  	m.Server.Start()
   929  
   930  	var peerTLScfg *tls.Config
   931  	if m.PeerTLSInfo != nil && !m.PeerTLSInfo.Empty() {
   932  		if peerTLScfg, err = m.PeerTLSInfo.ServerConfig(); err != nil {
   933  			return err
   934  		}
   935  	}
   936  
   937  	if m.GrpcListener != nil {
   938  		var (
   939  			tlscfg *tls.Config
   940  		)
   941  		if m.ClientTLSInfo != nil && !m.ClientTLSInfo.Empty() {
   942  			tlscfg, err = m.ClientTLSInfo.ServerConfig()
   943  			if err != nil {
   944  				return err
   945  			}
   946  		}
   947  		m.GrpcServer = v3rpc.Server(m.Server, tlscfg, m.GrpcServerRecorder.UnaryInterceptor(), m.GrpcServerOpts...)
   948  		m.ServerClient = v3client.New(m.Server)
   949  		lockpb.RegisterLockServer(m.GrpcServer, v3lock.NewLockServer(m.ServerClient))
   950  		epb.RegisterElectionServer(m.GrpcServer, v3election.NewElectionServer(m.ServerClient))
   951  		go m.GrpcServer.Serve(m.GrpcListener)
   952  	}
   953  
   954  	m.RaftHandler = &testutil.PauseableHandler{Next: etcdhttp.NewPeerHandler(m.Logger, m.Server)}
   955  
   956  	h := (http.Handler)(m.RaftHandler)
   957  	if m.GrpcListener != nil {
   958  		h = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   959  			m.RaftHandler.ServeHTTP(w, r)
   960  		})
   961  	}
   962  
   963  	for _, ln := range m.PeerListeners {
   964  		cm := cmux.New(ln)
   965  		// don't hang on matcher after closing listener
   966  		cm.SetReadTimeout(time.Second)
   967  
   968  		// serve http1/http2 rafthttp/grpc
   969  		ll := cm.Match(cmux.Any())
   970  		if peerTLScfg != nil {
   971  			if ll, err = transport.NewTLSListener(ll, m.PeerTLSInfo); err != nil {
   972  				return err
   973  			}
   974  		}
   975  		hs := &httptest.Server{
   976  			Listener: ll,
   977  			Config: &http.Server{
   978  				Handler:   h,
   979  				TLSConfig: peerTLScfg,
   980  				ErrorLog:  log.New(io.Discard, "net/http", 0),
   981  			},
   982  			TLS: peerTLScfg,
   983  		}
   984  		hs.Start()
   985  
   986  		donec := make(chan struct{})
   987  		go func() {
   988  			defer close(donec)
   989  			cm.Serve()
   990  		}()
   991  		closer := func() {
   992  			ll.Close()
   993  			hs.CloseClientConnections()
   994  			hs.Close()
   995  			<-donec
   996  		}
   997  		m.ServerClosers = append(m.ServerClosers, closer)
   998  	}
   999  	for _, ln := range m.ClientListeners {
  1000  		handler := http.NewServeMux()
  1001  		etcdhttp.HandleDebug(handler)
  1002  		etcdhttp.HandleVersion(handler, m.Server)
  1003  		etcdhttp.HandleMetrics(handler)
  1004  		etcdhttp.HandleHealth(m.Logger, handler, m.Server)
  1005  		hs := &httptest.Server{
  1006  			Listener: ln,
  1007  			Config: &http.Server{
  1008  				Handler:  handler,
  1009  				ErrorLog: log.New(io.Discard, "net/http", 0),
  1010  			},
  1011  		}
  1012  		if m.ClientTLSInfo == nil {
  1013  			hs.Start()
  1014  		} else {
  1015  			info := m.ClientTLSInfo
  1016  			hs.TLS, err = info.ServerConfig()
  1017  			if err != nil {
  1018  				return err
  1019  			}
  1020  
  1021  			// baseConfig is called on initial TLS handshake start.
  1022  			//
  1023  			// Previously,
  1024  			// 1. Server has non-empty (*tls.Config).Certificates on client hello
  1025  			// 2. Server calls (*tls.Config).GetCertificate iff:
  1026  			//    - Server'Server (*tls.Config).Certificates is not empty, or
  1027  			//    - Client supplies SNI; non-empty (*tls.ClientHelloInfo).ServerName
  1028  			//
  1029  			// When (*tls.Config).Certificates is always populated on initial handshake,
  1030  			// client is expected to provide a valid matching SNI to pass the TLS
  1031  			// verification, thus trigger server (*tls.Config).GetCertificate to reload
  1032  			// TLS assets. However, a cert whose SAN field does not include domain names
  1033  			// but only IP addresses, has empty (*tls.ClientHelloInfo).ServerName, thus
  1034  			// it was never able to trigger TLS reload on initial handshake; first
  1035  			// ceritifcate object was being used, never being updated.
  1036  			//
  1037  			// Now, (*tls.Config).Certificates is created empty on initial TLS client
  1038  			// handshake, in order to trigger (*tls.Config).GetCertificate and populate
  1039  			// rest of the certificates on every new TLS connection, even when client
  1040  			// SNI is empty (e.g. cert only includes IPs).
  1041  			//
  1042  			// This introduces another problem with "httptest.Server":
  1043  			// when server initial certificates are empty, certificates
  1044  			// are overwritten by Go'Server internal test certs, which have
  1045  			// different SAN fields (e.g. example.com). To work around,
  1046  			// re-overwrite (*tls.Config).Certificates before starting
  1047  			// test server.
  1048  			tlsCert, err := tlsutil.NewCert(info.CertFile, info.KeyFile, nil)
  1049  			if err != nil {
  1050  				return err
  1051  			}
  1052  			hs.TLS.Certificates = []tls.Certificate{*tlsCert}
  1053  
  1054  			hs.StartTLS()
  1055  		}
  1056  		closer := func() {
  1057  			ln.Close()
  1058  			hs.CloseClientConnections()
  1059  			hs.Close()
  1060  		}
  1061  		m.ServerClosers = append(m.ServerClosers, closer)
  1062  	}
  1063  	if m.GrpcURL != "" && m.Client == nil {
  1064  		m.Client, err = NewClientV3(m)
  1065  		if err != nil {
  1066  			return err
  1067  		}
  1068  	}
  1069  
  1070  	m.Logger.Info(
  1071  		"launched a member",
  1072  		zap.String("name", m.Name),
  1073  		zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()),
  1074  		zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()),
  1075  		zap.String("grpc-url", m.GrpcURL),
  1076  	)
  1077  	return nil
  1078  }
  1079  
  1080  func (m *Member) RecordedRequests() []grpc_testing.RequestInfo {
  1081  	return m.GrpcServerRecorder.RecordedRequests()
  1082  }
  1083  
  1084  func (m *Member) WaitOK(t testutil.TB) {
  1085  	m.WaitStarted(t)
  1086  	for m.Server.Leader() == 0 {
  1087  		time.Sleep(framecfg.TickDuration)
  1088  	}
  1089  }
  1090  
  1091  func (m *Member) WaitStarted(t testutil.TB) {
  1092  	for {
  1093  		ctx, cancel := context.WithTimeout(context.Background(), RequestTimeout)
  1094  		_, err := m.Client.Get(ctx, "/", clientv3.WithSerializable())
  1095  		if err != nil {
  1096  			time.Sleep(framecfg.TickDuration)
  1097  			continue
  1098  		}
  1099  		cancel()
  1100  		break
  1101  	}
  1102  }
  1103  
  1104  func WaitClientV3(t testutil.TB, kv clientv3.KV) {
  1105  	WaitClientV3WithKey(t, kv, "/")
  1106  }
  1107  
  1108  func WaitClientV3WithKey(t testutil.TB, kv clientv3.KV, key string) {
  1109  	timeout := time.Now().Add(RequestTimeout)
  1110  	var err error
  1111  	for time.Now().Before(timeout) {
  1112  		ctx, cancel := context.WithTimeout(context.Background(), RequestTimeout)
  1113  		_, err = kv.Get(ctx, key)
  1114  		cancel()
  1115  		if err == nil {
  1116  			return
  1117  		}
  1118  		time.Sleep(framecfg.TickDuration)
  1119  	}
  1120  	if err != nil {
  1121  		t.Fatalf("timed out waiting for client: %v", err)
  1122  	}
  1123  }
  1124  
  1125  func (m *Member) URL() string { return m.ClientURLs[0].String() }
  1126  
  1127  func (m *Member) Pause() {
  1128  	m.RaftHandler.Pause()
  1129  	m.Server.PauseSending()
  1130  }
  1131  
  1132  func (m *Member) Resume() {
  1133  	m.RaftHandler.Resume()
  1134  	m.Server.ResumeSending()
  1135  }
  1136  
  1137  // Close stops the member'Server etcdserver and closes its connections
  1138  func (m *Member) Close() {
  1139  	if m.GrpcBridge != nil {
  1140  		m.GrpcBridge.Close()
  1141  		m.GrpcBridge = nil
  1142  	}
  1143  	if m.ServerClient != nil {
  1144  		m.ServerClient.Close()
  1145  		m.ServerClient = nil
  1146  	}
  1147  	if m.GrpcServer != nil {
  1148  		ch := make(chan struct{})
  1149  		go func() {
  1150  			defer close(ch)
  1151  			// close listeners to stop accepting new connections,
  1152  			// will block on any existing transports
  1153  			m.GrpcServer.GracefulStop()
  1154  		}()
  1155  		// wait until all pending RPCs are finished
  1156  		select {
  1157  		case <-ch:
  1158  		case <-time.After(2 * time.Second):
  1159  			// took too long, manually close open transports
  1160  			// e.g. watch streams
  1161  			m.GrpcServer.Stop()
  1162  			<-ch
  1163  		}
  1164  		m.GrpcServer = nil
  1165  	}
  1166  	if m.Server != nil {
  1167  		m.Server.HardStop()
  1168  	}
  1169  	for _, f := range m.ServerClosers {
  1170  		f()
  1171  	}
  1172  	if !m.Closed {
  1173  		// Avoid verification of the same file multiple times
  1174  		// (that might not exist any longer)
  1175  		verify.MustVerifyIfEnabled(verify.Config{
  1176  			Logger:     m.Logger,
  1177  			DataDir:    m.DataDir,
  1178  			ExactIndex: false,
  1179  		})
  1180  	}
  1181  	m.Closed = true
  1182  }
  1183  
  1184  // Stop stops the member, but the data dir of the member is preserved.
  1185  func (m *Member) Stop(_ testutil.TB) {
  1186  	m.Logger.Info(
  1187  		"stopping a member",
  1188  		zap.String("name", m.Name),
  1189  		zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()),
  1190  		zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()),
  1191  		zap.String("grpc-url", m.GrpcURL),
  1192  	)
  1193  	m.Close()
  1194  	m.ServerClosers = nil
  1195  	m.Logger.Info(
  1196  		"stopped a member",
  1197  		zap.String("name", m.Name),
  1198  		zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()),
  1199  		zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()),
  1200  		zap.String("grpc-url", m.GrpcURL),
  1201  	)
  1202  }
  1203  
  1204  // CheckLeaderTransition waits for leader transition, returning the new leader ID.
  1205  func CheckLeaderTransition(m *Member, oldLead uint64) uint64 {
  1206  	interval := time.Duration(m.Server.Cfg.TickMs) * time.Millisecond
  1207  	for m.Server.Lead() == 0 || (m.Server.Lead() == oldLead) {
  1208  		time.Sleep(interval)
  1209  	}
  1210  	return m.Server.Lead()
  1211  }
  1212  
  1213  // StopNotify unblocks when a member stop completes
  1214  func (m *Member) StopNotify() <-chan struct{} {
  1215  	return m.Server.StopNotify()
  1216  }
  1217  
  1218  // Restart starts the member using the preserved data dir.
  1219  func (m *Member) Restart(t testutil.TB) error {
  1220  	m.Logger.Info(
  1221  		"restarting a member",
  1222  		zap.String("name", m.Name),
  1223  		zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()),
  1224  		zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()),
  1225  		zap.String("grpc-url", m.GrpcURL),
  1226  	)
  1227  	newPeerListeners := make([]net.Listener, 0)
  1228  	for _, ln := range m.PeerListeners {
  1229  		newPeerListeners = append(newPeerListeners, NewListenerWithAddr(t, ln.Addr().String()))
  1230  	}
  1231  	m.PeerListeners = newPeerListeners
  1232  	newClientListeners := make([]net.Listener, 0)
  1233  	for _, ln := range m.ClientListeners {
  1234  		newClientListeners = append(newClientListeners, NewListenerWithAddr(t, ln.Addr().String()))
  1235  	}
  1236  	m.ClientListeners = newClientListeners
  1237  
  1238  	if m.GrpcListener != nil {
  1239  		if err := m.listenGRPC(); err != nil {
  1240  			t.Fatal(err)
  1241  		}
  1242  	}
  1243  
  1244  	err := m.Launch()
  1245  	m.Logger.Info(
  1246  		"restarted a member",
  1247  		zap.String("name", m.Name),
  1248  		zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()),
  1249  		zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()),
  1250  		zap.String("grpc-url", m.GrpcURL),
  1251  		zap.Error(err),
  1252  	)
  1253  	return err
  1254  }
  1255  
  1256  // Terminate stops the member and removes the data dir.
  1257  func (m *Member) Terminate(t testutil.TB) {
  1258  	m.Logger.Info(
  1259  		"terminating a member",
  1260  		zap.String("name", m.Name),
  1261  		zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()),
  1262  		zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()),
  1263  		zap.String("grpc-url", m.GrpcURL),
  1264  	)
  1265  	m.Close()
  1266  	if !m.KeepDataDirTerminate {
  1267  		if err := os.RemoveAll(m.ServerConfig.DataDir); err != nil {
  1268  			t.Fatal(err)
  1269  		}
  1270  	}
  1271  	m.Logger.Info(
  1272  		"terminated a member",
  1273  		zap.String("name", m.Name),
  1274  		zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()),
  1275  		zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()),
  1276  		zap.String("grpc-url", m.GrpcURL),
  1277  	)
  1278  }
  1279  
  1280  // Metric gets the metric value for a member
  1281  func (m *Member) Metric(metricName string, expectLabels ...string) (string, error) {
  1282  	cfgtls := transport.TLSInfo{}
  1283  	tr, err := transport.NewTimeoutTransport(cfgtls, time.Second, time.Second, time.Second)
  1284  	if err != nil {
  1285  		return "", err
  1286  	}
  1287  	cli := &http.Client{Transport: tr}
  1288  	resp, err := cli.Get(m.ClientURLs[0].String() + "/metrics")
  1289  	if err != nil {
  1290  		return "", err
  1291  	}
  1292  	defer resp.Body.Close()
  1293  	b, rerr := io.ReadAll(resp.Body)
  1294  	if rerr != nil {
  1295  		return "", rerr
  1296  	}
  1297  	lines := strings.Split(string(b), "\n")
  1298  	for _, l := range lines {
  1299  		if !strings.HasPrefix(l, metricName) {
  1300  			continue
  1301  		}
  1302  		ok := true
  1303  		for _, lv := range expectLabels {
  1304  			if !strings.Contains(l, lv) {
  1305  				ok = false
  1306  				break
  1307  			}
  1308  		}
  1309  		if !ok {
  1310  			continue
  1311  		}
  1312  		return strings.Split(l, " ")[1], nil
  1313  	}
  1314  	return "", nil
  1315  }
  1316  
  1317  // InjectPartition drops connections from m to others, vice versa.
  1318  func (m *Member) InjectPartition(t testutil.TB, others ...*Member) {
  1319  	for _, other := range others {
  1320  		m.Server.CutPeer(other.Server.MemberId())
  1321  		other.Server.CutPeer(m.Server.MemberId())
  1322  		t.Logf("network partition injected between: %v <-> %v", m.Server.MemberId(), other.Server.MemberId())
  1323  	}
  1324  }
  1325  
  1326  // RecoverPartition recovers connections from m to others, vice versa.
  1327  func (m *Member) RecoverPartition(t testutil.TB, others ...*Member) {
  1328  	for _, other := range others {
  1329  		m.Server.MendPeer(other.Server.MemberId())
  1330  		other.Server.MendPeer(m.Server.MemberId())
  1331  		t.Logf("network partition between: %v <-> %v", m.Server.MemberId(), other.Server.MemberId())
  1332  	}
  1333  }
  1334  
  1335  func (m *Member) ReadyNotify() <-chan struct{} {
  1336  	return m.Server.ReadyNotify()
  1337  }
  1338  
  1339  type SortableMemberSliceByPeerURLs []*pb.Member
  1340  
  1341  func (p SortableMemberSliceByPeerURLs) Len() int { return len(p) }
  1342  func (p SortableMemberSliceByPeerURLs) Less(i, j int) bool {
  1343  	return p[i].PeerURLs[0] < p[j].PeerURLs[0]
  1344  }
  1345  func (p SortableMemberSliceByPeerURLs) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
  1346  
  1347  // NewCluster returns a launched Cluster with a grpc client connection
  1348  // for each Cluster member.
  1349  func NewCluster(t testutil.TB, cfg *ClusterConfig) *Cluster {
  1350  	t.Helper()
  1351  
  1352  	assertInTestContext(t)
  1353  
  1354  	testutil.SkipTestIfShortMode(t, "Cannot start etcd Cluster in --short tests")
  1355  
  1356  	c := &Cluster{Cfg: cfg}
  1357  	ms := make([]*Member, cfg.Size)
  1358  	for i := 0; i < cfg.Size; i++ {
  1359  		ms[i] = c.mustNewMember(t)
  1360  	}
  1361  	c.Members = ms
  1362  	if err := c.fillClusterForMembers(); err != nil {
  1363  		t.Fatalf("fillClusterForMembers failed: %v", err)
  1364  	}
  1365  	c.Launch(t)
  1366  
  1367  	return c
  1368  }
  1369  
  1370  func (c *Cluster) TakeClient(idx int) {
  1371  	c.mu.Lock()
  1372  	c.Members[idx].Client = nil
  1373  	c.mu.Unlock()
  1374  }
  1375  
  1376  func (c *Cluster) Terminate(t testutil.TB) {
  1377  	if t != nil {
  1378  		t.Logf("========= Cluster termination started =====================")
  1379  	}
  1380  	for _, m := range c.Members {
  1381  		if m.Client != nil {
  1382  			m.Client.Close()
  1383  		}
  1384  	}
  1385  	var wg sync.WaitGroup
  1386  	wg.Add(len(c.Members))
  1387  	for _, m := range c.Members {
  1388  		go func(mm *Member) {
  1389  			defer wg.Done()
  1390  			mm.Terminate(t)
  1391  		}(m)
  1392  	}
  1393  	wg.Wait()
  1394  	if t != nil {
  1395  		t.Logf("========= Cluster termination succeeded ===================")
  1396  	}
  1397  }
  1398  
  1399  func (c *Cluster) RandClient() *clientv3.Client {
  1400  	return c.Members[rand.Intn(len(c.Members))].Client
  1401  }
  1402  
  1403  func (c *Cluster) Client(i int) *clientv3.Client {
  1404  	return c.Members[i].Client
  1405  }
  1406  
  1407  func (c *Cluster) Endpoints() []string {
  1408  	var endpoints []string
  1409  	for _, m := range c.Members {
  1410  		endpoints = append(endpoints, m.GrpcURL)
  1411  	}
  1412  	return endpoints
  1413  }
  1414  
  1415  func (c *Cluster) ClusterClient(t testing.TB, opts ...func(*clientv3.Config)) (client *clientv3.Client, err error) {
  1416  	cfg, err := c.newClientCfg()
  1417  	if err != nil {
  1418  		return nil, err
  1419  	}
  1420  	for _, opt := range opts {
  1421  		opt(cfg)
  1422  	}
  1423  	client, err = newClientV3(*cfg)
  1424  	if err != nil {
  1425  		return nil, err
  1426  	}
  1427  	t.Cleanup(func() {
  1428  		client.Close()
  1429  	})
  1430  	return client, nil
  1431  }
  1432  
  1433  func (c *Cluster) newClientCfg() (*clientv3.Config, error) {
  1434  	cfg := &clientv3.Config{
  1435  		Endpoints:          c.Endpoints(),
  1436  		DialTimeout:        5 * time.Second,
  1437  		DialOptions:        []grpc.DialOption{grpc.WithBlock()},
  1438  		MaxCallSendMsgSize: c.Cfg.ClientMaxCallSendMsgSize,
  1439  		MaxCallRecvMsgSize: c.Cfg.ClientMaxCallRecvMsgSize,
  1440  	}
  1441  	if c.Cfg.ClientTLS != nil {
  1442  		tls, err := c.Cfg.ClientTLS.ClientConfig()
  1443  		if err != nil {
  1444  			return nil, err
  1445  		}
  1446  		cfg.TLS = tls
  1447  	}
  1448  	return cfg, nil
  1449  }
  1450  
  1451  // NewClientV3 creates a new grpc client connection to the member
  1452  func (c *Cluster) NewClientV3(memberIndex int) (*clientv3.Client, error) {
  1453  	return NewClientV3(c.Members[memberIndex])
  1454  }
  1455  
  1456  func makeClients(t testutil.TB, clus *Cluster, clients *[]*clientv3.Client, chooseMemberIndex func() int) func() *clientv3.Client {
  1457  	var mu sync.Mutex
  1458  	*clients = nil
  1459  	return func() *clientv3.Client {
  1460  		cli, err := clus.NewClientV3(chooseMemberIndex())
  1461  		if err != nil {
  1462  			t.Fatalf("cannot create client: %v", err)
  1463  		}
  1464  		mu.Lock()
  1465  		*clients = append(*clients, cli)
  1466  		mu.Unlock()
  1467  		return cli
  1468  	}
  1469  }
  1470  
  1471  // MakeSingleNodeClients creates factory of clients that all connect to member 0.
  1472  // All the created clients are put on the 'clients' list. The factory is thread-safe.
  1473  func MakeSingleNodeClients(t testutil.TB, clus *Cluster, clients *[]*clientv3.Client) func() *clientv3.Client {
  1474  	return makeClients(t, clus, clients, func() int { return 0 })
  1475  }
  1476  
  1477  // MakeMultiNodeClients creates factory of clients that all connect to random members.
  1478  // All the created clients are put on the 'clients' list. The factory is thread-safe.
  1479  func MakeMultiNodeClients(t testutil.TB, clus *Cluster, clients *[]*clientv3.Client) func() *clientv3.Client {
  1480  	return makeClients(t, clus, clients, func() int { return rand.Intn(len(clus.Members)) })
  1481  }
  1482  
  1483  // CloseClients closes all the clients from the 'clients' list.
  1484  func CloseClients(t testutil.TB, clients []*clientv3.Client) {
  1485  	for _, cli := range clients {
  1486  		if err := cli.Close(); err != nil {
  1487  			t.Fatal(err)
  1488  		}
  1489  	}
  1490  }
  1491  
  1492  type GrpcAPI struct {
  1493  	// Cluster is the Cluster API for the client'Server connection.
  1494  	Cluster pb.ClusterClient
  1495  	// KV is the keyvalue API for the client'Server connection.
  1496  	KV pb.KVClient
  1497  	// Lease is the lease API for the client'Server connection.
  1498  	Lease pb.LeaseClient
  1499  	// Watch is the watch API for the client'Server connection.
  1500  	Watch pb.WatchClient
  1501  	// Maintenance is the maintenance API for the client'Server connection.
  1502  	Maintenance pb.MaintenanceClient
  1503  	// Auth is the authentication API for the client'Server connection.
  1504  	Auth pb.AuthClient
  1505  	// Lock is the lock API for the client'Server connection.
  1506  	Lock lockpb.LockClient
  1507  	// Election is the election API for the client'Server connection.
  1508  	Election epb.ElectionClient
  1509  }
  1510  
  1511  // GetLearnerMembers returns the list of learner members in Cluster using MemberList API.
  1512  func (c *Cluster) GetLearnerMembers() ([]*pb.Member, error) {
  1513  	cli := c.Client(0)
  1514  	resp, err := cli.MemberList(context.Background())
  1515  	if err != nil {
  1516  		return nil, fmt.Errorf("failed to list member %v", err)
  1517  	}
  1518  	var learners []*pb.Member
  1519  	for _, m := range resp.Members {
  1520  		if m.IsLearner {
  1521  			learners = append(learners, m)
  1522  		}
  1523  	}
  1524  	return learners, nil
  1525  }
  1526  
  1527  // AddAndLaunchLearnerMember creates a leaner member, adds it to Cluster
  1528  // via v3 MemberAdd API, and then launches the new member.
  1529  func (c *Cluster) AddAndLaunchLearnerMember(t testutil.TB) {
  1530  	m := c.mustNewMember(t)
  1531  	m.IsLearner = true
  1532  
  1533  	scheme := SchemeFromTLSInfo(c.Cfg.PeerTLS)
  1534  	peerURLs := []string{scheme + "://" + m.PeerListeners[0].Addr().String()}
  1535  
  1536  	cli := c.Client(0)
  1537  	_, err := cli.MemberAddAsLearner(context.Background(), peerURLs)
  1538  	if err != nil {
  1539  		t.Fatalf("failed to add learner member %v", err)
  1540  	}
  1541  
  1542  	m.InitialPeerURLsMap = types.URLsMap{}
  1543  	for _, mm := range c.Members {
  1544  		m.InitialPeerURLsMap[mm.Name] = mm.PeerURLs
  1545  	}
  1546  	m.InitialPeerURLsMap[m.Name] = m.PeerURLs
  1547  	m.NewCluster = false
  1548  
  1549  	if err := m.Launch(); err != nil {
  1550  		t.Fatal(err)
  1551  	}
  1552  
  1553  	c.Members = append(c.Members, m)
  1554  
  1555  	c.waitMembersMatch(t)
  1556  }
  1557  
  1558  // getMembers returns a list of members in Cluster, in format of etcdserverpb.Member
  1559  func (c *Cluster) getMembers() []*pb.Member {
  1560  	var mems []*pb.Member
  1561  	for _, m := range c.Members {
  1562  		mem := &pb.Member{
  1563  			Name:       m.Name,
  1564  			PeerURLs:   m.PeerURLs.StringSlice(),
  1565  			ClientURLs: m.ClientURLs.StringSlice(),
  1566  			IsLearner:  m.IsLearner,
  1567  		}
  1568  		mems = append(mems, mem)
  1569  	}
  1570  	return mems
  1571  }
  1572  
  1573  // waitMembersMatch waits until v3rpc MemberList returns the 'same' members info as the
  1574  // local 'c.Members', which is the local recording of members in the testing Cluster. With
  1575  // the exception that the local recording c.Members does not have info on Member.ID, which
  1576  // is generated when the member is been added to Cluster.
  1577  //
  1578  // Note:
  1579  // A successful match means the Member.clientURLs are matched. This means member has already
  1580  // finished publishing its server attributes to Cluster. Publishing attributes is a Cluster-wide
  1581  // write request (in v2 server). Therefore, at this point, any raft log entries prior to this
  1582  // would have already been applied.
  1583  //
  1584  // If a new member was added to an existing Cluster, at this point, it has finished publishing
  1585  // its own server attributes to the Cluster. And therefore by the same argument, it has already
  1586  // applied the raft log entries (especially those of type raftpb.ConfChangeType). At this point,
  1587  // the new member has the correct view of the Cluster configuration.
  1588  //
  1589  // Special note on learner member:
  1590  // Learner member is only added to a Cluster via v3rpc MemberAdd API (as of v3.4). When starting
  1591  // the learner member, its initial view of the Cluster created by peerURLs map does not have info
  1592  // on whether or not the new member itself is learner. But at this point, a successful match does
  1593  // indicate that the new learner member has applied the raftpb.ConfChangeAddLearnerNode entry
  1594  // which was used to add the learner itself to the Cluster, and therefore it has the correct info
  1595  // on learner.
  1596  func (c *Cluster) waitMembersMatch(t testutil.TB) {
  1597  	wMembers := c.getMembers()
  1598  	sort.Sort(SortableProtoMemberSliceByPeerURLs(wMembers))
  1599  	cli := c.Client(0)
  1600  	for {
  1601  		resp, err := cli.MemberList(context.Background())
  1602  		if err != nil {
  1603  			t.Fatalf("failed to list member %v", err)
  1604  		}
  1605  
  1606  		if len(resp.Members) != len(wMembers) {
  1607  			continue
  1608  		}
  1609  		sort.Sort(SortableProtoMemberSliceByPeerURLs(resp.Members))
  1610  		for _, m := range resp.Members {
  1611  			m.ID = 0
  1612  		}
  1613  		if reflect.DeepEqual(resp.Members, wMembers) {
  1614  			return
  1615  		}
  1616  
  1617  		time.Sleep(framecfg.TickDuration)
  1618  	}
  1619  }
  1620  
  1621  type SortableProtoMemberSliceByPeerURLs []*pb.Member
  1622  
  1623  func (p SortableProtoMemberSliceByPeerURLs) Len() int { return len(p) }
  1624  func (p SortableProtoMemberSliceByPeerURLs) Less(i, j int) bool {
  1625  	return p[i].PeerURLs[0] < p[j].PeerURLs[0]
  1626  }
  1627  func (p SortableProtoMemberSliceByPeerURLs) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
  1628  
  1629  // MustNewMember creates a new member instance based on the response of V3 Member Add API.
  1630  func (c *Cluster) MustNewMember(t testutil.TB, resp *clientv3.MemberAddResponse) *Member {
  1631  	m := c.mustNewMember(t)
  1632  	m.IsLearner = resp.Member.IsLearner
  1633  	m.NewCluster = false
  1634  
  1635  	m.InitialPeerURLsMap = types.URLsMap{}
  1636  	for _, mm := range c.Members {
  1637  		m.InitialPeerURLsMap[mm.Name] = mm.PeerURLs
  1638  	}
  1639  	m.InitialPeerURLsMap[m.Name] = types.MustNewURLs(resp.Member.PeerURLs)
  1640  	c.Members = append(c.Members, m)
  1641  	return m
  1642  }