github.com/lfch/etcd-io/tests/v3@v3.0.0-20221004140520-eac99acd3e9d/framework/integration/cluster.go (about) 1 // Copyright 2016 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package integration 16 17 import ( 18 "context" 19 "crypto/tls" 20 "errors" 21 "fmt" 22 "io" 23 "log" 24 "math/rand" 25 "net" 26 "net/http" 27 "net/http/httptest" 28 "os" 29 "reflect" 30 "sort" 31 "strings" 32 "sync" 33 "sync/atomic" 34 "testing" 35 "time" 36 37 pb "github.com/lfch/etcd-io/api/v3/etcdserverpb" 38 "github.com/lfch/etcd-io/client/pkg/v3/testutil" 39 "github.com/lfch/etcd-io/client/pkg/v3/tlsutil" 40 "github.com/lfch/etcd-io/client/pkg/v3/transport" 41 "github.com/lfch/etcd-io/client/pkg/v3/types" 42 clientv3 "github.com/lfch/etcd-io/client/v3" 43 "github.com/lfch/etcd-io/pkg/v3/grpc_testing" 44 "github.com/lfch/etcd-io/raft/v3" 45 "github.com/lfch/etcd-io/server/v3/config" 46 "github.com/lfch/etcd-io/server/v3/embed" 47 "github.com/lfch/etcd-io/server/v3/etcdserver" 48 "github.com/lfch/etcd-io/server/v3/etcdserver/api/etcdhttp" 49 "github.com/lfch/etcd-io/server/v3/etcdserver/api/membership" 50 "github.com/lfch/etcd-io/server/v3/etcdserver/api/rafthttp" 51 "github.com/lfch/etcd-io/server/v3/etcdserver/api/v3client" 52 "github.com/lfch/etcd-io/server/v3/etcdserver/api/v3election" 53 epb "github.com/lfch/etcd-io/server/v3/etcdserver/api/v3election/v3electionpb" 54 "github.com/lfch/etcd-io/server/v3/etcdserver/api/v3lock" 55 lockpb "github.com/lfch/etcd-io/server/v3/etcdserver/api/v3lock/v3lockpb" 56 "github.com/lfch/etcd-io/server/v3/etcdserver/api/v3rpc" 57 "github.com/lfch/etcd-io/server/v3/verify" 58 framecfg "github.com/lfch/etcd-io/tests/v3/framework/config" 59 "go.uber.org/zap/zapcore" 60 "go.uber.org/zap/zaptest" 61 62 "github.com/google/go-cmp/cmp" 63 "github.com/google/go-cmp/cmp/cmpopts" 64 "github.com/soheilhy/cmux" 65 "go.uber.org/zap" 66 "golang.org/x/crypto/bcrypt" 67 "google.golang.org/grpc" 68 "google.golang.org/grpc/keepalive" 69 ) 70 71 const ( 72 // RequestWaitTimeout is the time duration to wait for a request to go through or detect leader loss. 73 RequestWaitTimeout = 5 * time.Second 74 RequestTimeout = 20 * time.Second 75 76 ClusterName = "etcd" 77 BasePort = 21000 78 URLScheme = "unix" 79 URLSchemeTLS = "unixs" 80 BaseGRPCPort = 30000 81 ) 82 83 var ( 84 ElectionTicks = 10 85 86 // LocalListenCount integration test uses unique ports, counting up, to listen for each 87 // member, ensuring restarted members can listen on the same port again. 88 LocalListenCount = int32(0) 89 90 TestTLSInfo = transport.TLSInfo{ 91 KeyFile: MustAbsPath("../fixtures/server.key.insecure"), 92 CertFile: MustAbsPath("../fixtures/server.crt"), 93 TrustedCAFile: MustAbsPath("../fixtures/ca.crt"), 94 ClientCertAuth: true, 95 } 96 97 TestTLSInfoWithSpecificUsage = transport.TLSInfo{ 98 KeyFile: MustAbsPath("../fixtures/server-serverusage.key.insecure"), 99 CertFile: MustAbsPath("../fixtures/server-serverusage.crt"), 100 ClientKeyFile: MustAbsPath("../fixtures/client-clientusage.key.insecure"), 101 ClientCertFile: MustAbsPath("../fixtures/client-clientusage.crt"), 102 TrustedCAFile: MustAbsPath("../fixtures/ca.crt"), 103 ClientCertAuth: true, 104 } 105 106 TestTLSInfoIP = transport.TLSInfo{ 107 KeyFile: MustAbsPath("../fixtures/server-ip.key.insecure"), 108 CertFile: MustAbsPath("../fixtures/server-ip.crt"), 109 TrustedCAFile: MustAbsPath("../fixtures/ca.crt"), 110 ClientCertAuth: true, 111 } 112 113 TestTLSInfoExpired = transport.TLSInfo{ 114 KeyFile: MustAbsPath("./fixtures-expired/server.key.insecure"), 115 CertFile: MustAbsPath("./fixtures-expired/server.crt"), 116 TrustedCAFile: MustAbsPath("./fixtures-expired/ca.crt"), 117 ClientCertAuth: true, 118 } 119 120 TestTLSInfoExpiredIP = transport.TLSInfo{ 121 KeyFile: MustAbsPath("./fixtures-expired/server-ip.key.insecure"), 122 CertFile: MustAbsPath("./fixtures-expired/server-ip.crt"), 123 TrustedCAFile: MustAbsPath("./fixtures-expired/ca.crt"), 124 ClientCertAuth: true, 125 } 126 127 DefaultTokenJWT = fmt.Sprintf("jwt,pub-key=%s,priv-key=%s,sign-method=RS256,ttl=1s", 128 MustAbsPath("../fixtures/server.crt"), MustAbsPath("../fixtures/server.key.insecure")) 129 130 // UniqueNumber is used to generate unique port numbers 131 // Should only be accessed via atomic package methods. 132 UniqueNumber int32 133 ) 134 135 type ClusterConfig struct { 136 Size int 137 PeerTLS *transport.TLSInfo 138 ClientTLS *transport.TLSInfo 139 140 DiscoveryURL string 141 142 AuthToken string 143 AuthTokenTTL uint 144 145 QuotaBackendBytes int64 146 147 MaxTxnOps uint 148 MaxRequestBytes uint 149 SnapshotCount uint64 150 SnapshotCatchUpEntries uint64 151 152 GRPCKeepAliveMinTime time.Duration 153 GRPCKeepAliveInterval time.Duration 154 GRPCKeepAliveTimeout time.Duration 155 156 ClientMaxCallSendMsgSize int 157 ClientMaxCallRecvMsgSize int 158 159 // UseIP is true to use only IP for gRPC requests. 160 UseIP bool 161 // UseBridge adds bridge between client and grpc server. Should be used in tests that 162 // want to manipulate connection or require connection not breaking despite server stop/restart. 163 UseBridge bool 164 // UseTCP configures server listen on tcp socket. If disabled unix socket is used. 165 UseTCP bool 166 167 EnableLeaseCheckpoint bool 168 LeaseCheckpointInterval time.Duration 169 LeaseCheckpointPersist bool 170 171 WatchProgressNotifyInterval time.Duration 172 ExperimentalMaxLearners int 173 DisableStrictReconfigCheck bool 174 CorruptCheckTime time.Duration 175 } 176 177 type Cluster struct { 178 Cfg *ClusterConfig 179 Members []*Member 180 LastMemberNum int 181 182 mu sync.Mutex 183 } 184 185 func SchemeFromTLSInfo(tls *transport.TLSInfo) string { 186 if tls == nil { 187 return URLScheme 188 } 189 return URLSchemeTLS 190 } 191 192 func (c *Cluster) fillClusterForMembers() error { 193 if c.Cfg.DiscoveryURL != "" { 194 // Cluster will be discovered 195 return nil 196 } 197 198 addrs := make([]string, 0) 199 for _, m := range c.Members { 200 scheme := SchemeFromTLSInfo(m.PeerTLSInfo) 201 for _, l := range m.PeerListeners { 202 addrs = append(addrs, fmt.Sprintf("%s=%s://%s", m.Name, scheme, l.Addr().String())) 203 } 204 } 205 clusterStr := strings.Join(addrs, ",") 206 var err error 207 for _, m := range c.Members { 208 m.InitialPeerURLsMap, err = types.NewURLsMap(clusterStr) 209 if err != nil { 210 return err 211 } 212 } 213 return nil 214 } 215 216 func (c *Cluster) Launch(t testutil.TB) { 217 t.Logf("Launching new cluster...") 218 errc := make(chan error) 219 for _, m := range c.Members { 220 // Members are launched in separate goroutines because if they boot 221 // using discovery url, they have to wait for others to register to continue. 222 go func(m *Member) { 223 errc <- m.Launch() 224 }(m) 225 } 226 for range c.Members { 227 if err := <-errc; err != nil { 228 c.Terminate(t) 229 t.Fatalf("error setting up member: %v", err) 230 } 231 } 232 // wait Cluster to be stable to receive future client requests 233 c.WaitMembersMatch(t, c.ProtoMembers()) 234 c.waitVersion() 235 for _, m := range c.Members { 236 t.Logf(" - %v -> %v (%v)", m.Name, m.ID(), m.GRPCURL()) 237 } 238 } 239 240 // ProtoMembers returns a list of all active members as client.Members 241 func (c *Cluster) ProtoMembers() []*pb.Member { 242 var ms []*pb.Member 243 for _, m := range c.Members { 244 pScheme := SchemeFromTLSInfo(m.PeerTLSInfo) 245 cScheme := SchemeFromTLSInfo(m.ClientTLSInfo) 246 cm := &pb.Member{Name: m.Name} 247 for _, ln := range m.PeerListeners { 248 cm.PeerURLs = append(cm.PeerURLs, pScheme+"://"+ln.Addr().String()) 249 } 250 for _, ln := range m.ClientListeners { 251 cm.ClientURLs = append(cm.ClientURLs, cScheme+"://"+ln.Addr().String()) 252 } 253 ms = append(ms, cm) 254 } 255 return ms 256 } 257 258 func (c *Cluster) mustNewMember(t testutil.TB) *Member { 259 memberNumber := c.LastMemberNum 260 c.LastMemberNum++ 261 m := MustNewMember(t, 262 MemberConfig{ 263 Name: fmt.Sprintf("m%v", memberNumber), 264 MemberNumber: memberNumber, 265 AuthToken: c.Cfg.AuthToken, 266 AuthTokenTTL: c.Cfg.AuthTokenTTL, 267 PeerTLS: c.Cfg.PeerTLS, 268 ClientTLS: c.Cfg.ClientTLS, 269 QuotaBackendBytes: c.Cfg.QuotaBackendBytes, 270 MaxTxnOps: c.Cfg.MaxTxnOps, 271 MaxRequestBytes: c.Cfg.MaxRequestBytes, 272 SnapshotCount: c.Cfg.SnapshotCount, 273 SnapshotCatchUpEntries: c.Cfg.SnapshotCatchUpEntries, 274 GrpcKeepAliveMinTime: c.Cfg.GRPCKeepAliveMinTime, 275 GrpcKeepAliveInterval: c.Cfg.GRPCKeepAliveInterval, 276 GrpcKeepAliveTimeout: c.Cfg.GRPCKeepAliveTimeout, 277 ClientMaxCallSendMsgSize: c.Cfg.ClientMaxCallSendMsgSize, 278 ClientMaxCallRecvMsgSize: c.Cfg.ClientMaxCallRecvMsgSize, 279 UseIP: c.Cfg.UseIP, 280 UseBridge: c.Cfg.UseBridge, 281 UseTCP: c.Cfg.UseTCP, 282 EnableLeaseCheckpoint: c.Cfg.EnableLeaseCheckpoint, 283 LeaseCheckpointInterval: c.Cfg.LeaseCheckpointInterval, 284 LeaseCheckpointPersist: c.Cfg.LeaseCheckpointPersist, 285 WatchProgressNotifyInterval: c.Cfg.WatchProgressNotifyInterval, 286 ExperimentalMaxLearners: c.Cfg.ExperimentalMaxLearners, 287 DisableStrictReconfigCheck: c.Cfg.DisableStrictReconfigCheck, 288 CorruptCheckTime: c.Cfg.CorruptCheckTime, 289 }) 290 m.DiscoveryURL = c.Cfg.DiscoveryURL 291 return m 292 } 293 294 // addMember return PeerURLs of the added member. 295 func (c *Cluster) addMember(t testutil.TB) types.URLs { 296 m := c.mustNewMember(t) 297 298 scheme := SchemeFromTLSInfo(c.Cfg.PeerTLS) 299 300 // send add request to the Cluster 301 var err error 302 for i := 0; i < len(c.Members); i++ { 303 peerURL := scheme + "://" + m.PeerListeners[0].Addr().String() 304 if err = c.AddMemberByURL(t, c.Members[i].Client, peerURL); err == nil { 305 break 306 } 307 } 308 if err != nil { 309 t.Fatalf("add member failed on all members error: %v", err) 310 } 311 312 m.InitialPeerURLsMap = types.URLsMap{} 313 for _, mm := range c.Members { 314 m.InitialPeerURLsMap[mm.Name] = mm.PeerURLs 315 } 316 m.InitialPeerURLsMap[m.Name] = m.PeerURLs 317 m.NewCluster = false 318 if err := m.Launch(); err != nil { 319 t.Fatal(err) 320 } 321 c.Members = append(c.Members, m) 322 // wait Cluster to be stable to receive future client requests 323 c.WaitMembersMatch(t, c.ProtoMembers()) 324 return m.PeerURLs 325 } 326 327 func (c *Cluster) AddMemberByURL(t testutil.TB, cc *clientv3.Client, peerURL string) error { 328 ctx, cancel := context.WithTimeout(context.Background(), RequestTimeout) 329 _, err := cc.MemberAdd(ctx, []string{peerURL}) 330 cancel() 331 if err != nil { 332 return err 333 } 334 335 // wait for the add node entry applied in the Cluster 336 members := append(c.ProtoMembers(), &pb.Member{PeerURLs: []string{peerURL}, ClientURLs: []string{}}) 337 c.WaitMembersMatch(t, members) 338 return nil 339 } 340 341 // AddMember return PeerURLs of the added member. 342 func (c *Cluster) AddMember(t testutil.TB) types.URLs { 343 return c.addMember(t) 344 } 345 346 func (c *Cluster) RemoveMember(t testutil.TB, cc *clientv3.Client, id uint64) error { 347 // send remove request to the Cluster 348 349 ctx, cancel := context.WithTimeout(context.Background(), RequestTimeout) 350 _, err := cc.MemberRemove(ctx, id) 351 cancel() 352 if err != nil { 353 return err 354 } 355 newMembers := make([]*Member, 0) 356 for _, m := range c.Members { 357 if uint64(m.Server.MemberId()) != id { 358 newMembers = append(newMembers, m) 359 } else { 360 m.Client.Close() 361 select { 362 case <-m.Server.StopNotify(): 363 m.Terminate(t) 364 // 1s stop delay + election timeout + 1s disk and network delay + connection write timeout 365 // TODO: remove connection write timeout by selecting on http response closeNotifier 366 // blocking on https://github.com/golang/go/issues/9524 367 case <-time.After(time.Second + time.Duration(ElectionTicks)*framecfg.TickDuration + time.Second + rafthttp.ConnWriteTimeout): 368 t.Fatalf("failed to remove member %s in time", m.Server.MemberId()) 369 } 370 } 371 } 372 c.Members = newMembers 373 c.WaitMembersMatch(t, c.ProtoMembers()) 374 return nil 375 } 376 377 func (c *Cluster) WaitMembersMatch(t testutil.TB, membs []*pb.Member) { 378 ctx, cancel := context.WithTimeout(context.Background(), RequestTimeout) 379 defer cancel() 380 for _, m := range c.Members { 381 cc := ToGRPC(m.Client) 382 select { 383 case <-m.Server.StopNotify(): 384 continue 385 default: 386 } 387 for { 388 resp, err := cc.Cluster.MemberList(ctx, &pb.MemberListRequest{Linearizable: false}) 389 if errors.Is(err, context.DeadlineExceeded) { 390 t.Fatal(err) 391 } 392 if err != nil { 393 continue 394 } 395 if isMembersEqual(resp.Members, membs) { 396 break 397 } 398 time.Sleep(framecfg.TickDuration) 399 } 400 } 401 } 402 403 // WaitLeader returns index of the member in c.Members that is leader 404 // or fails the test (if not established in 30s). 405 func (c *Cluster) WaitLeader(t testing.TB) int { 406 return c.WaitMembersForLeader(t, c.Members) 407 } 408 409 // WaitMembersForLeader waits until given members agree on the same leader, 410 // and returns its 'index' in the 'membs' list 411 func (c *Cluster) WaitMembersForLeader(t testing.TB, membs []*Member) int { 412 t.Logf("WaitMembersForLeader") 413 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 414 defer cancel() 415 l := 0 416 for l = c.waitMembersForLeader(ctx, t, membs); l < 0; { 417 if ctx.Err() != nil { 418 t.Fatalf("WaitLeader FAILED: %v", ctx.Err()) 419 } 420 } 421 t.Logf("WaitMembersForLeader succeeded. Cluster leader index: %v", l) 422 423 // TODO: Consider second pass check as sometimes leadership is lost 424 // soon after election: 425 // 426 // We perform multiple attempts, as some-times just after successful WaitLLeader 427 // there is a race and leadership is quickly lost: 428 // - MsgAppResp message with higher term from 2acc3d3b521981 [term: 3] {"member": "m0"} 429 // - 9903a56eaf96afac became follower at term 3 {"member": "m0"} 430 // - 9903a56eaf96afac lost leader 9903a56eaf96afac at term 3 {"member": "m0"} 431 432 return l 433 } 434 435 // WaitMembersForLeader waits until given members agree on the same leader, 436 // and returns its 'index' in the 'membs' list 437 func (c *Cluster) waitMembersForLeader(ctx context.Context, t testing.TB, membs []*Member) int { 438 possibleLead := make(map[uint64]bool) 439 var lead uint64 440 for _, m := range membs { 441 possibleLead[uint64(m.Server.MemberId())] = true 442 } 443 cc, err := c.ClusterClient(t) 444 if err != nil { 445 t.Fatal(err) 446 } 447 // ensure leader is up via linearizable get 448 for { 449 ctx, cancel := context.WithTimeout(ctx, 10*framecfg.TickDuration+time.Second) 450 _, err := cc.Get(ctx, "0") 451 cancel() 452 if err == nil || strings.Contains(err.Error(), "Key not found") { 453 break 454 } 455 } 456 457 for lead == 0 || !possibleLead[lead] { 458 lead = 0 459 for _, m := range membs { 460 select { 461 case <-m.Server.StopNotify(): 462 continue 463 default: 464 } 465 if lead != 0 && lead != m.Server.Lead() { 466 lead = 0 467 time.Sleep(10 * framecfg.TickDuration) 468 break 469 } 470 lead = m.Server.Lead() 471 } 472 } 473 474 for i, m := range membs { 475 if uint64(m.Server.MemberId()) == lead { 476 t.Logf("waitMembersForLeader found leader. Member: %v lead: %x", i, lead) 477 return i 478 } 479 } 480 481 t.Logf("waitMembersForLeader failed (-1)") 482 return -1 483 } 484 485 func (c *Cluster) WaitNoLeader() { c.WaitMembersNoLeader(c.Members) } 486 487 // WaitMembersNoLeader waits until given members lose leader. 488 func (c *Cluster) WaitMembersNoLeader(membs []*Member) { 489 noLeader := false 490 for !noLeader { 491 noLeader = true 492 for _, m := range membs { 493 select { 494 case <-m.Server.StopNotify(): 495 continue 496 default: 497 } 498 if m.Server.Lead() != 0 { 499 noLeader = false 500 time.Sleep(10 * framecfg.TickDuration) 501 break 502 } 503 } 504 } 505 } 506 507 func (c *Cluster) waitVersion() { 508 for _, m := range c.Members { 509 for { 510 if m.Server.ClusterVersion() != nil { 511 break 512 } 513 time.Sleep(framecfg.TickDuration) 514 } 515 } 516 } 517 518 // isMembersEqual checks whether two members equal except ID field. 519 // The given wmembs should always set ID field to empty string. 520 func isMembersEqual(membs []*pb.Member, wmembs []*pb.Member) bool { 521 sort.Sort(SortableMemberSliceByPeerURLs(membs)) 522 sort.Sort(SortableMemberSliceByPeerURLs(wmembs)) 523 return cmp.Equal(membs, wmembs, cmpopts.IgnoreFields(pb.Member{}, "ID", "PeerURLs", "ClientURLs")) 524 } 525 526 func NewLocalListener(t testutil.TB) net.Listener { 527 c := atomic.AddInt32(&LocalListenCount, 1) 528 // Go 1.8+ allows only numbers in port 529 addr := fmt.Sprintf("127.0.0.1:%05d%05d", c+BasePort, os.Getpid()) 530 return NewListenerWithAddr(t, addr) 531 } 532 533 func NewListenerWithAddr(t testutil.TB, addr string) net.Listener { 534 t.Logf("Creating listener with addr: %v", addr) 535 l, err := transport.NewUnixListener(addr) 536 if err != nil { 537 t.Fatal(err) 538 } 539 return l 540 } 541 542 type Member struct { 543 config.ServerConfig 544 UniqNumber int 545 MemberNumber int 546 PeerListeners, ClientListeners []net.Listener 547 GrpcListener net.Listener 548 // PeerTLSInfo enables peer TLS when set 549 PeerTLSInfo *transport.TLSInfo 550 // ClientTLSInfo enables client TLS when set 551 ClientTLSInfo *transport.TLSInfo 552 DialOptions []grpc.DialOption 553 554 RaftHandler *testutil.PauseableHandler 555 Server *etcdserver.EtcdServer 556 ServerClosers []func() 557 558 GrpcServerOpts []grpc.ServerOption 559 GrpcServer *grpc.Server 560 GrpcURL string 561 GrpcBridge *bridge 562 563 // ServerClient is a clientv3 that directly calls the etcdserver. 564 ServerClient *clientv3.Client 565 // Client is a clientv3 that communicates via socket, either UNIX or TCP. 566 Client *clientv3.Client 567 568 KeepDataDirTerminate bool 569 ClientMaxCallSendMsgSize int 570 ClientMaxCallRecvMsgSize int 571 UseIP bool 572 UseBridge bool 573 UseTCP bool 574 575 IsLearner bool 576 Closed bool 577 578 GrpcServerRecorder *grpc_testing.GrpcRecorder 579 } 580 581 func (m *Member) GRPCURL() string { return m.GrpcURL } 582 583 type MemberConfig struct { 584 Name string 585 UniqNumber int64 586 MemberNumber int 587 PeerTLS *transport.TLSInfo 588 ClientTLS *transport.TLSInfo 589 AuthToken string 590 AuthTokenTTL uint 591 QuotaBackendBytes int64 592 MaxTxnOps uint 593 MaxRequestBytes uint 594 SnapshotCount uint64 595 SnapshotCatchUpEntries uint64 596 GrpcKeepAliveMinTime time.Duration 597 GrpcKeepAliveInterval time.Duration 598 GrpcKeepAliveTimeout time.Duration 599 ClientMaxCallSendMsgSize int 600 ClientMaxCallRecvMsgSize int 601 UseIP bool 602 UseBridge bool 603 UseTCP bool 604 EnableLeaseCheckpoint bool 605 LeaseCheckpointInterval time.Duration 606 LeaseCheckpointPersist bool 607 WatchProgressNotifyInterval time.Duration 608 ExperimentalMaxLearners int 609 DisableStrictReconfigCheck bool 610 CorruptCheckTime time.Duration 611 } 612 613 // MustNewMember return an inited member with the given name. If peerTLS is 614 // set, it will use https scheme to communicate between peers. 615 func MustNewMember(t testutil.TB, mcfg MemberConfig) *Member { 616 var err error 617 m := &Member{ 618 MemberNumber: mcfg.MemberNumber, 619 UniqNumber: int(atomic.AddInt32(&LocalListenCount, 1)), 620 } 621 622 peerScheme := SchemeFromTLSInfo(mcfg.PeerTLS) 623 clientScheme := SchemeFromTLSInfo(mcfg.ClientTLS) 624 625 pln := NewLocalListener(t) 626 m.PeerListeners = []net.Listener{pln} 627 m.PeerURLs, err = types.NewURLs([]string{peerScheme + "://" + pln.Addr().String()}) 628 if err != nil { 629 t.Fatal(err) 630 } 631 m.PeerTLSInfo = mcfg.PeerTLS 632 633 cln := NewLocalListener(t) 634 m.ClientListeners = []net.Listener{cln} 635 m.ClientURLs, err = types.NewURLs([]string{clientScheme + "://" + cln.Addr().String()}) 636 if err != nil { 637 t.Fatal(err) 638 } 639 m.ClientTLSInfo = mcfg.ClientTLS 640 641 m.Name = mcfg.Name 642 643 m.DataDir, err = os.MkdirTemp(t.TempDir(), "etcd") 644 if err != nil { 645 t.Fatal(err) 646 } 647 clusterStr := fmt.Sprintf("%s=%s://%s", mcfg.Name, peerScheme, pln.Addr().String()) 648 m.InitialPeerURLsMap, err = types.NewURLsMap(clusterStr) 649 if err != nil { 650 t.Fatal(err) 651 } 652 m.InitialClusterToken = ClusterName 653 m.NewCluster = true 654 m.BootstrapTimeout = 10 * time.Millisecond 655 if m.PeerTLSInfo != nil { 656 m.ServerConfig.PeerTLSInfo = *m.PeerTLSInfo 657 } 658 m.ElectionTicks = ElectionTicks 659 m.InitialElectionTickAdvance = true 660 m.TickMs = uint(framecfg.TickDuration / time.Millisecond) 661 m.QuotaBackendBytes = mcfg.QuotaBackendBytes 662 m.MaxTxnOps = mcfg.MaxTxnOps 663 if m.MaxTxnOps == 0 { 664 m.MaxTxnOps = embed.DefaultMaxTxnOps 665 } 666 m.MaxRequestBytes = mcfg.MaxRequestBytes 667 if m.MaxRequestBytes == 0 { 668 m.MaxRequestBytes = embed.DefaultMaxRequestBytes 669 } 670 m.SnapshotCount = etcdserver.DefaultSnapshotCount 671 if mcfg.SnapshotCount != 0 { 672 m.SnapshotCount = mcfg.SnapshotCount 673 } 674 m.SnapshotCatchUpEntries = etcdserver.DefaultSnapshotCatchUpEntries 675 if mcfg.SnapshotCatchUpEntries != 0 { 676 m.SnapshotCatchUpEntries = mcfg.SnapshotCatchUpEntries 677 } 678 679 // for the purpose of integration testing, simple token is enough 680 m.AuthToken = "simple" 681 if mcfg.AuthToken != "" { 682 m.AuthToken = mcfg.AuthToken 683 } 684 if mcfg.AuthTokenTTL != 0 { 685 m.TokenTTL = mcfg.AuthTokenTTL 686 } 687 688 m.BcryptCost = uint(bcrypt.MinCost) // use min bcrypt cost to speedy up integration testing 689 690 m.GrpcServerOpts = []grpc.ServerOption{} 691 if mcfg.GrpcKeepAliveMinTime > time.Duration(0) { 692 m.GrpcServerOpts = append(m.GrpcServerOpts, grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{ 693 MinTime: mcfg.GrpcKeepAliveMinTime, 694 PermitWithoutStream: false, 695 })) 696 } 697 if mcfg.GrpcKeepAliveInterval > time.Duration(0) && 698 mcfg.GrpcKeepAliveTimeout > time.Duration(0) { 699 m.GrpcServerOpts = append(m.GrpcServerOpts, grpc.KeepaliveParams(keepalive.ServerParameters{ 700 Time: mcfg.GrpcKeepAliveInterval, 701 Timeout: mcfg.GrpcKeepAliveTimeout, 702 })) 703 } 704 m.ClientMaxCallSendMsgSize = mcfg.ClientMaxCallSendMsgSize 705 m.ClientMaxCallRecvMsgSize = mcfg.ClientMaxCallRecvMsgSize 706 m.UseIP = mcfg.UseIP 707 m.UseBridge = mcfg.UseBridge 708 m.UseTCP = mcfg.UseTCP 709 m.EnableLeaseCheckpoint = mcfg.EnableLeaseCheckpoint 710 m.LeaseCheckpointInterval = mcfg.LeaseCheckpointInterval 711 m.LeaseCheckpointPersist = mcfg.LeaseCheckpointPersist 712 713 m.WatchProgressNotifyInterval = mcfg.WatchProgressNotifyInterval 714 715 m.InitialCorruptCheck = true 716 if mcfg.CorruptCheckTime > time.Duration(0) { 717 m.CorruptCheckTime = mcfg.CorruptCheckTime 718 } 719 m.WarningApplyDuration = embed.DefaultWarningApplyDuration 720 m.WarningUnaryRequestDuration = embed.DefaultWarningUnaryRequestDuration 721 m.ExperimentalMaxLearners = membership.DefaultMaxLearners 722 if mcfg.ExperimentalMaxLearners != 0 { 723 m.ExperimentalMaxLearners = mcfg.ExperimentalMaxLearners 724 } 725 m.V2Deprecation = config.V2_DEPR_DEFAULT 726 m.GrpcServerRecorder = &grpc_testing.GrpcRecorder{} 727 m.Logger = memberLogger(t, mcfg.Name) 728 m.StrictReconfigCheck = !mcfg.DisableStrictReconfigCheck 729 if err := m.listenGRPC(); err != nil { 730 t.Fatalf("listenGRPC FAILED: %v", err) 731 } 732 t.Cleanup(func() { 733 // if we didn't cleanup the logger, the consecutive test 734 // might reuse this (t). 735 raft.ResetDefaultLogger() 736 }) 737 return m 738 } 739 740 func memberLogger(t testutil.TB, name string) *zap.Logger { 741 level := zapcore.InfoLevel 742 if os.Getenv("CLUSTER_DEBUG") != "" { 743 level = zapcore.DebugLevel 744 } 745 746 options := zaptest.WrapOptions(zap.Fields(zap.String("member", name))) 747 return zaptest.NewLogger(t, zaptest.Level(level), options).Named(name) 748 } 749 750 // listenGRPC starts a grpc server over a unix domain socket on the member 751 func (m *Member) listenGRPC() error { 752 // prefix with localhost so cert has right domain 753 network, host, port := m.grpcAddr() 754 grpcAddr := host + ":" + port 755 wd, err := os.Getwd() 756 if err != nil { 757 return err 758 } 759 m.Logger.Info("LISTEN GRPC", zap.String("grpcAddr", grpcAddr), zap.String("m.Name", m.Name), zap.String("workdir", wd)) 760 grpcListener, err := net.Listen(network, grpcAddr) 761 if err != nil { 762 return fmt.Errorf("listen failed on grpc socket %s (%v)", grpcAddr, err) 763 } 764 m.GrpcURL = fmt.Sprintf("%s://%s", m.clientScheme(), grpcAddr) 765 if m.UseBridge { 766 _, err = m.addBridge() 767 if err != nil { 768 grpcListener.Close() 769 return err 770 } 771 } 772 m.GrpcListener = grpcListener 773 return nil 774 } 775 776 func (m *Member) clientScheme() string { 777 switch { 778 case m.UseTCP && m.ClientTLSInfo != nil: 779 return "https" 780 case m.UseTCP && m.ClientTLSInfo == nil: 781 return "http" 782 case !m.UseTCP && m.ClientTLSInfo != nil: 783 return "unixs" 784 case !m.UseTCP && m.ClientTLSInfo == nil: 785 return "unix" 786 } 787 m.Logger.Panic("Failed to determine client schema") 788 return "" 789 } 790 791 func (m *Member) addBridge() (*bridge, error) { 792 network, host, port := m.grpcAddr() 793 grpcAddr := host + ":" + port 794 bridgeAddr := grpcAddr + "0" 795 m.Logger.Info("LISTEN BRIDGE", zap.String("grpc-address", bridgeAddr), zap.String("member", m.Name)) 796 bridgeListener, err := transport.NewUnixListener(bridgeAddr) 797 if err != nil { 798 return nil, fmt.Errorf("listen failed on bridge socket %s (%v)", bridgeAddr, err) 799 } 800 m.GrpcBridge, err = newBridge(dialer{network: network, addr: grpcAddr}, bridgeListener) 801 if err != nil { 802 bridgeListener.Close() 803 return nil, err 804 } 805 m.GrpcURL = m.clientScheme() + "://" + bridgeAddr 806 return m.GrpcBridge, nil 807 } 808 809 func (m *Member) Bridge() *bridge { 810 if !m.UseBridge { 811 m.Logger.Panic("Bridge not available. Please configure using bridge before creating Cluster.") 812 } 813 return m.GrpcBridge 814 } 815 816 func (m *Member) grpcAddr() (network, host, port string) { 817 // prefix with localhost so cert has right domain 818 host = "localhost" 819 if m.UseIP { // for IP-only TLS certs 820 host = "127.0.0.1" 821 } 822 network = "unix" 823 if m.UseTCP { 824 network = "tcp" 825 } 826 port = m.Name 827 if m.UseTCP { 828 port = fmt.Sprintf("%d", GrpcPortNumber(m.UniqNumber, m.MemberNumber)) 829 } 830 return network, host, port 831 } 832 833 func GrpcPortNumber(uniqNumber, memberNumber int) int { 834 return BaseGRPCPort + uniqNumber*10 + memberNumber 835 } 836 837 type dialer struct { 838 network string 839 addr string 840 } 841 842 func (d dialer) Dial() (net.Conn, error) { 843 return net.Dial(d.network, d.addr) 844 } 845 846 func (m *Member) ElectionTimeout() time.Duration { 847 return time.Duration(m.Server.Cfg.ElectionTicks*int(m.Server.Cfg.TickMs)) * time.Millisecond 848 } 849 850 func (m *Member) ID() types.ID { return m.Server.MemberId() } 851 852 // NewClientV3 creates a new grpc client connection to the member 853 func NewClientV3(m *Member) (*clientv3.Client, error) { 854 if m.GrpcURL == "" { 855 return nil, fmt.Errorf("member not configured for grpc") 856 } 857 858 cfg := clientv3.Config{ 859 Endpoints: []string{m.GrpcURL}, 860 DialTimeout: 5 * time.Second, 861 DialOptions: []grpc.DialOption{grpc.WithBlock()}, 862 MaxCallSendMsgSize: m.ClientMaxCallSendMsgSize, 863 MaxCallRecvMsgSize: m.ClientMaxCallRecvMsgSize, 864 Logger: m.Logger.Named("client"), 865 } 866 867 if m.ClientTLSInfo != nil { 868 tls, err := m.ClientTLSInfo.ClientConfig() 869 if err != nil { 870 return nil, err 871 } 872 cfg.TLS = tls 873 } 874 if m.DialOptions != nil { 875 cfg.DialOptions = append(cfg.DialOptions, m.DialOptions...) 876 } 877 return newClientV3(cfg) 878 } 879 880 // Clone returns a member with the same server configuration. The returned 881 // member will not set PeerListeners and ClientListeners. 882 func (m *Member) Clone(t testutil.TB) *Member { 883 mm := &Member{} 884 mm.ServerConfig = m.ServerConfig 885 886 var err error 887 clientURLStrs := m.ClientURLs.StringSlice() 888 mm.ClientURLs, err = types.NewURLs(clientURLStrs) 889 if err != nil { 890 // this should never fail 891 panic(err) 892 } 893 peerURLStrs := m.PeerURLs.StringSlice() 894 mm.PeerURLs, err = types.NewURLs(peerURLStrs) 895 if err != nil { 896 // this should never fail 897 panic(err) 898 } 899 clusterStr := m.InitialPeerURLsMap.String() 900 mm.InitialPeerURLsMap, err = types.NewURLsMap(clusterStr) 901 if err != nil { 902 // this should never fail 903 panic(err) 904 } 905 mm.InitialClusterToken = m.InitialClusterToken 906 mm.ElectionTicks = m.ElectionTicks 907 mm.PeerTLSInfo = m.PeerTLSInfo 908 mm.ClientTLSInfo = m.ClientTLSInfo 909 mm.Logger = memberLogger(t, mm.Name+"c") 910 return mm 911 } 912 913 // Launch starts a member based on ServerConfig, PeerListeners 914 // and ClientListeners. 915 func (m *Member) Launch() error { 916 m.Logger.Info( 917 "launching a member", 918 zap.String("name", m.Name), 919 zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), 920 zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), 921 zap.String("grpc-url", m.GrpcURL), 922 ) 923 var err error 924 if m.Server, err = etcdserver.NewServer(m.ServerConfig); err != nil { 925 return fmt.Errorf("failed to initialize the etcd server: %v", err) 926 } 927 m.Server.SyncTicker = time.NewTicker(500 * time.Millisecond) 928 m.Server.Start() 929 930 var peerTLScfg *tls.Config 931 if m.PeerTLSInfo != nil && !m.PeerTLSInfo.Empty() { 932 if peerTLScfg, err = m.PeerTLSInfo.ServerConfig(); err != nil { 933 return err 934 } 935 } 936 937 if m.GrpcListener != nil { 938 var ( 939 tlscfg *tls.Config 940 ) 941 if m.ClientTLSInfo != nil && !m.ClientTLSInfo.Empty() { 942 tlscfg, err = m.ClientTLSInfo.ServerConfig() 943 if err != nil { 944 return err 945 } 946 } 947 m.GrpcServer = v3rpc.Server(m.Server, tlscfg, m.GrpcServerRecorder.UnaryInterceptor(), m.GrpcServerOpts...) 948 m.ServerClient = v3client.New(m.Server) 949 lockpb.RegisterLockServer(m.GrpcServer, v3lock.NewLockServer(m.ServerClient)) 950 epb.RegisterElectionServer(m.GrpcServer, v3election.NewElectionServer(m.ServerClient)) 951 go m.GrpcServer.Serve(m.GrpcListener) 952 } 953 954 m.RaftHandler = &testutil.PauseableHandler{Next: etcdhttp.NewPeerHandler(m.Logger, m.Server)} 955 956 h := (http.Handler)(m.RaftHandler) 957 if m.GrpcListener != nil { 958 h = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 959 m.RaftHandler.ServeHTTP(w, r) 960 }) 961 } 962 963 for _, ln := range m.PeerListeners { 964 cm := cmux.New(ln) 965 // don't hang on matcher after closing listener 966 cm.SetReadTimeout(time.Second) 967 968 // serve http1/http2 rafthttp/grpc 969 ll := cm.Match(cmux.Any()) 970 if peerTLScfg != nil { 971 if ll, err = transport.NewTLSListener(ll, m.PeerTLSInfo); err != nil { 972 return err 973 } 974 } 975 hs := &httptest.Server{ 976 Listener: ll, 977 Config: &http.Server{ 978 Handler: h, 979 TLSConfig: peerTLScfg, 980 ErrorLog: log.New(io.Discard, "net/http", 0), 981 }, 982 TLS: peerTLScfg, 983 } 984 hs.Start() 985 986 donec := make(chan struct{}) 987 go func() { 988 defer close(donec) 989 cm.Serve() 990 }() 991 closer := func() { 992 ll.Close() 993 hs.CloseClientConnections() 994 hs.Close() 995 <-donec 996 } 997 m.ServerClosers = append(m.ServerClosers, closer) 998 } 999 for _, ln := range m.ClientListeners { 1000 handler := http.NewServeMux() 1001 etcdhttp.HandleDebug(handler) 1002 etcdhttp.HandleVersion(handler, m.Server) 1003 etcdhttp.HandleMetrics(handler) 1004 etcdhttp.HandleHealth(m.Logger, handler, m.Server) 1005 hs := &httptest.Server{ 1006 Listener: ln, 1007 Config: &http.Server{ 1008 Handler: handler, 1009 ErrorLog: log.New(io.Discard, "net/http", 0), 1010 }, 1011 } 1012 if m.ClientTLSInfo == nil { 1013 hs.Start() 1014 } else { 1015 info := m.ClientTLSInfo 1016 hs.TLS, err = info.ServerConfig() 1017 if err != nil { 1018 return err 1019 } 1020 1021 // baseConfig is called on initial TLS handshake start. 1022 // 1023 // Previously, 1024 // 1. Server has non-empty (*tls.Config).Certificates on client hello 1025 // 2. Server calls (*tls.Config).GetCertificate iff: 1026 // - Server'Server (*tls.Config).Certificates is not empty, or 1027 // - Client supplies SNI; non-empty (*tls.ClientHelloInfo).ServerName 1028 // 1029 // When (*tls.Config).Certificates is always populated on initial handshake, 1030 // client is expected to provide a valid matching SNI to pass the TLS 1031 // verification, thus trigger server (*tls.Config).GetCertificate to reload 1032 // TLS assets. However, a cert whose SAN field does not include domain names 1033 // but only IP addresses, has empty (*tls.ClientHelloInfo).ServerName, thus 1034 // it was never able to trigger TLS reload on initial handshake; first 1035 // ceritifcate object was being used, never being updated. 1036 // 1037 // Now, (*tls.Config).Certificates is created empty on initial TLS client 1038 // handshake, in order to trigger (*tls.Config).GetCertificate and populate 1039 // rest of the certificates on every new TLS connection, even when client 1040 // SNI is empty (e.g. cert only includes IPs). 1041 // 1042 // This introduces another problem with "httptest.Server": 1043 // when server initial certificates are empty, certificates 1044 // are overwritten by Go'Server internal test certs, which have 1045 // different SAN fields (e.g. example.com). To work around, 1046 // re-overwrite (*tls.Config).Certificates before starting 1047 // test server. 1048 tlsCert, err := tlsutil.NewCert(info.CertFile, info.KeyFile, nil) 1049 if err != nil { 1050 return err 1051 } 1052 hs.TLS.Certificates = []tls.Certificate{*tlsCert} 1053 1054 hs.StartTLS() 1055 } 1056 closer := func() { 1057 ln.Close() 1058 hs.CloseClientConnections() 1059 hs.Close() 1060 } 1061 m.ServerClosers = append(m.ServerClosers, closer) 1062 } 1063 if m.GrpcURL != "" && m.Client == nil { 1064 m.Client, err = NewClientV3(m) 1065 if err != nil { 1066 return err 1067 } 1068 } 1069 1070 m.Logger.Info( 1071 "launched a member", 1072 zap.String("name", m.Name), 1073 zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), 1074 zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), 1075 zap.String("grpc-url", m.GrpcURL), 1076 ) 1077 return nil 1078 } 1079 1080 func (m *Member) RecordedRequests() []grpc_testing.RequestInfo { 1081 return m.GrpcServerRecorder.RecordedRequests() 1082 } 1083 1084 func (m *Member) WaitOK(t testutil.TB) { 1085 m.WaitStarted(t) 1086 for m.Server.Leader() == 0 { 1087 time.Sleep(framecfg.TickDuration) 1088 } 1089 } 1090 1091 func (m *Member) WaitStarted(t testutil.TB) { 1092 for { 1093 ctx, cancel := context.WithTimeout(context.Background(), RequestTimeout) 1094 _, err := m.Client.Get(ctx, "/", clientv3.WithSerializable()) 1095 if err != nil { 1096 time.Sleep(framecfg.TickDuration) 1097 continue 1098 } 1099 cancel() 1100 break 1101 } 1102 } 1103 1104 func WaitClientV3(t testutil.TB, kv clientv3.KV) { 1105 WaitClientV3WithKey(t, kv, "/") 1106 } 1107 1108 func WaitClientV3WithKey(t testutil.TB, kv clientv3.KV, key string) { 1109 timeout := time.Now().Add(RequestTimeout) 1110 var err error 1111 for time.Now().Before(timeout) { 1112 ctx, cancel := context.WithTimeout(context.Background(), RequestTimeout) 1113 _, err = kv.Get(ctx, key) 1114 cancel() 1115 if err == nil { 1116 return 1117 } 1118 time.Sleep(framecfg.TickDuration) 1119 } 1120 if err != nil { 1121 t.Fatalf("timed out waiting for client: %v", err) 1122 } 1123 } 1124 1125 func (m *Member) URL() string { return m.ClientURLs[0].String() } 1126 1127 func (m *Member) Pause() { 1128 m.RaftHandler.Pause() 1129 m.Server.PauseSending() 1130 } 1131 1132 func (m *Member) Resume() { 1133 m.RaftHandler.Resume() 1134 m.Server.ResumeSending() 1135 } 1136 1137 // Close stops the member'Server etcdserver and closes its connections 1138 func (m *Member) Close() { 1139 if m.GrpcBridge != nil { 1140 m.GrpcBridge.Close() 1141 m.GrpcBridge = nil 1142 } 1143 if m.ServerClient != nil { 1144 m.ServerClient.Close() 1145 m.ServerClient = nil 1146 } 1147 if m.GrpcServer != nil { 1148 ch := make(chan struct{}) 1149 go func() { 1150 defer close(ch) 1151 // close listeners to stop accepting new connections, 1152 // will block on any existing transports 1153 m.GrpcServer.GracefulStop() 1154 }() 1155 // wait until all pending RPCs are finished 1156 select { 1157 case <-ch: 1158 case <-time.After(2 * time.Second): 1159 // took too long, manually close open transports 1160 // e.g. watch streams 1161 m.GrpcServer.Stop() 1162 <-ch 1163 } 1164 m.GrpcServer = nil 1165 } 1166 if m.Server != nil { 1167 m.Server.HardStop() 1168 } 1169 for _, f := range m.ServerClosers { 1170 f() 1171 } 1172 if !m.Closed { 1173 // Avoid verification of the same file multiple times 1174 // (that might not exist any longer) 1175 verify.MustVerifyIfEnabled(verify.Config{ 1176 Logger: m.Logger, 1177 DataDir: m.DataDir, 1178 ExactIndex: false, 1179 }) 1180 } 1181 m.Closed = true 1182 } 1183 1184 // Stop stops the member, but the data dir of the member is preserved. 1185 func (m *Member) Stop(_ testutil.TB) { 1186 m.Logger.Info( 1187 "stopping a member", 1188 zap.String("name", m.Name), 1189 zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), 1190 zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), 1191 zap.String("grpc-url", m.GrpcURL), 1192 ) 1193 m.Close() 1194 m.ServerClosers = nil 1195 m.Logger.Info( 1196 "stopped a member", 1197 zap.String("name", m.Name), 1198 zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), 1199 zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), 1200 zap.String("grpc-url", m.GrpcURL), 1201 ) 1202 } 1203 1204 // CheckLeaderTransition waits for leader transition, returning the new leader ID. 1205 func CheckLeaderTransition(m *Member, oldLead uint64) uint64 { 1206 interval := time.Duration(m.Server.Cfg.TickMs) * time.Millisecond 1207 for m.Server.Lead() == 0 || (m.Server.Lead() == oldLead) { 1208 time.Sleep(interval) 1209 } 1210 return m.Server.Lead() 1211 } 1212 1213 // StopNotify unblocks when a member stop completes 1214 func (m *Member) StopNotify() <-chan struct{} { 1215 return m.Server.StopNotify() 1216 } 1217 1218 // Restart starts the member using the preserved data dir. 1219 func (m *Member) Restart(t testutil.TB) error { 1220 m.Logger.Info( 1221 "restarting a member", 1222 zap.String("name", m.Name), 1223 zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), 1224 zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), 1225 zap.String("grpc-url", m.GrpcURL), 1226 ) 1227 newPeerListeners := make([]net.Listener, 0) 1228 for _, ln := range m.PeerListeners { 1229 newPeerListeners = append(newPeerListeners, NewListenerWithAddr(t, ln.Addr().String())) 1230 } 1231 m.PeerListeners = newPeerListeners 1232 newClientListeners := make([]net.Listener, 0) 1233 for _, ln := range m.ClientListeners { 1234 newClientListeners = append(newClientListeners, NewListenerWithAddr(t, ln.Addr().String())) 1235 } 1236 m.ClientListeners = newClientListeners 1237 1238 if m.GrpcListener != nil { 1239 if err := m.listenGRPC(); err != nil { 1240 t.Fatal(err) 1241 } 1242 } 1243 1244 err := m.Launch() 1245 m.Logger.Info( 1246 "restarted a member", 1247 zap.String("name", m.Name), 1248 zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), 1249 zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), 1250 zap.String("grpc-url", m.GrpcURL), 1251 zap.Error(err), 1252 ) 1253 return err 1254 } 1255 1256 // Terminate stops the member and removes the data dir. 1257 func (m *Member) Terminate(t testutil.TB) { 1258 m.Logger.Info( 1259 "terminating a member", 1260 zap.String("name", m.Name), 1261 zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), 1262 zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), 1263 zap.String("grpc-url", m.GrpcURL), 1264 ) 1265 m.Close() 1266 if !m.KeepDataDirTerminate { 1267 if err := os.RemoveAll(m.ServerConfig.DataDir); err != nil { 1268 t.Fatal(err) 1269 } 1270 } 1271 m.Logger.Info( 1272 "terminated a member", 1273 zap.String("name", m.Name), 1274 zap.Strings("advertise-peer-urls", m.PeerURLs.StringSlice()), 1275 zap.Strings("listen-client-urls", m.ClientURLs.StringSlice()), 1276 zap.String("grpc-url", m.GrpcURL), 1277 ) 1278 } 1279 1280 // Metric gets the metric value for a member 1281 func (m *Member) Metric(metricName string, expectLabels ...string) (string, error) { 1282 cfgtls := transport.TLSInfo{} 1283 tr, err := transport.NewTimeoutTransport(cfgtls, time.Second, time.Second, time.Second) 1284 if err != nil { 1285 return "", err 1286 } 1287 cli := &http.Client{Transport: tr} 1288 resp, err := cli.Get(m.ClientURLs[0].String() + "/metrics") 1289 if err != nil { 1290 return "", err 1291 } 1292 defer resp.Body.Close() 1293 b, rerr := io.ReadAll(resp.Body) 1294 if rerr != nil { 1295 return "", rerr 1296 } 1297 lines := strings.Split(string(b), "\n") 1298 for _, l := range lines { 1299 if !strings.HasPrefix(l, metricName) { 1300 continue 1301 } 1302 ok := true 1303 for _, lv := range expectLabels { 1304 if !strings.Contains(l, lv) { 1305 ok = false 1306 break 1307 } 1308 } 1309 if !ok { 1310 continue 1311 } 1312 return strings.Split(l, " ")[1], nil 1313 } 1314 return "", nil 1315 } 1316 1317 // InjectPartition drops connections from m to others, vice versa. 1318 func (m *Member) InjectPartition(t testutil.TB, others ...*Member) { 1319 for _, other := range others { 1320 m.Server.CutPeer(other.Server.MemberId()) 1321 other.Server.CutPeer(m.Server.MemberId()) 1322 t.Logf("network partition injected between: %v <-> %v", m.Server.MemberId(), other.Server.MemberId()) 1323 } 1324 } 1325 1326 // RecoverPartition recovers connections from m to others, vice versa. 1327 func (m *Member) RecoverPartition(t testutil.TB, others ...*Member) { 1328 for _, other := range others { 1329 m.Server.MendPeer(other.Server.MemberId()) 1330 other.Server.MendPeer(m.Server.MemberId()) 1331 t.Logf("network partition between: %v <-> %v", m.Server.MemberId(), other.Server.MemberId()) 1332 } 1333 } 1334 1335 func (m *Member) ReadyNotify() <-chan struct{} { 1336 return m.Server.ReadyNotify() 1337 } 1338 1339 type SortableMemberSliceByPeerURLs []*pb.Member 1340 1341 func (p SortableMemberSliceByPeerURLs) Len() int { return len(p) } 1342 func (p SortableMemberSliceByPeerURLs) Less(i, j int) bool { 1343 return p[i].PeerURLs[0] < p[j].PeerURLs[0] 1344 } 1345 func (p SortableMemberSliceByPeerURLs) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 1346 1347 // NewCluster returns a launched Cluster with a grpc client connection 1348 // for each Cluster member. 1349 func NewCluster(t testutil.TB, cfg *ClusterConfig) *Cluster { 1350 t.Helper() 1351 1352 assertInTestContext(t) 1353 1354 testutil.SkipTestIfShortMode(t, "Cannot start etcd Cluster in --short tests") 1355 1356 c := &Cluster{Cfg: cfg} 1357 ms := make([]*Member, cfg.Size) 1358 for i := 0; i < cfg.Size; i++ { 1359 ms[i] = c.mustNewMember(t) 1360 } 1361 c.Members = ms 1362 if err := c.fillClusterForMembers(); err != nil { 1363 t.Fatalf("fillClusterForMembers failed: %v", err) 1364 } 1365 c.Launch(t) 1366 1367 return c 1368 } 1369 1370 func (c *Cluster) TakeClient(idx int) { 1371 c.mu.Lock() 1372 c.Members[idx].Client = nil 1373 c.mu.Unlock() 1374 } 1375 1376 func (c *Cluster) Terminate(t testutil.TB) { 1377 if t != nil { 1378 t.Logf("========= Cluster termination started =====================") 1379 } 1380 for _, m := range c.Members { 1381 if m.Client != nil { 1382 m.Client.Close() 1383 } 1384 } 1385 var wg sync.WaitGroup 1386 wg.Add(len(c.Members)) 1387 for _, m := range c.Members { 1388 go func(mm *Member) { 1389 defer wg.Done() 1390 mm.Terminate(t) 1391 }(m) 1392 } 1393 wg.Wait() 1394 if t != nil { 1395 t.Logf("========= Cluster termination succeeded ===================") 1396 } 1397 } 1398 1399 func (c *Cluster) RandClient() *clientv3.Client { 1400 return c.Members[rand.Intn(len(c.Members))].Client 1401 } 1402 1403 func (c *Cluster) Client(i int) *clientv3.Client { 1404 return c.Members[i].Client 1405 } 1406 1407 func (c *Cluster) Endpoints() []string { 1408 var endpoints []string 1409 for _, m := range c.Members { 1410 endpoints = append(endpoints, m.GrpcURL) 1411 } 1412 return endpoints 1413 } 1414 1415 func (c *Cluster) ClusterClient(t testing.TB, opts ...func(*clientv3.Config)) (client *clientv3.Client, err error) { 1416 cfg, err := c.newClientCfg() 1417 if err != nil { 1418 return nil, err 1419 } 1420 for _, opt := range opts { 1421 opt(cfg) 1422 } 1423 client, err = newClientV3(*cfg) 1424 if err != nil { 1425 return nil, err 1426 } 1427 t.Cleanup(func() { 1428 client.Close() 1429 }) 1430 return client, nil 1431 } 1432 1433 func (c *Cluster) newClientCfg() (*clientv3.Config, error) { 1434 cfg := &clientv3.Config{ 1435 Endpoints: c.Endpoints(), 1436 DialTimeout: 5 * time.Second, 1437 DialOptions: []grpc.DialOption{grpc.WithBlock()}, 1438 MaxCallSendMsgSize: c.Cfg.ClientMaxCallSendMsgSize, 1439 MaxCallRecvMsgSize: c.Cfg.ClientMaxCallRecvMsgSize, 1440 } 1441 if c.Cfg.ClientTLS != nil { 1442 tls, err := c.Cfg.ClientTLS.ClientConfig() 1443 if err != nil { 1444 return nil, err 1445 } 1446 cfg.TLS = tls 1447 } 1448 return cfg, nil 1449 } 1450 1451 // NewClientV3 creates a new grpc client connection to the member 1452 func (c *Cluster) NewClientV3(memberIndex int) (*clientv3.Client, error) { 1453 return NewClientV3(c.Members[memberIndex]) 1454 } 1455 1456 func makeClients(t testutil.TB, clus *Cluster, clients *[]*clientv3.Client, chooseMemberIndex func() int) func() *clientv3.Client { 1457 var mu sync.Mutex 1458 *clients = nil 1459 return func() *clientv3.Client { 1460 cli, err := clus.NewClientV3(chooseMemberIndex()) 1461 if err != nil { 1462 t.Fatalf("cannot create client: %v", err) 1463 } 1464 mu.Lock() 1465 *clients = append(*clients, cli) 1466 mu.Unlock() 1467 return cli 1468 } 1469 } 1470 1471 // MakeSingleNodeClients creates factory of clients that all connect to member 0. 1472 // All the created clients are put on the 'clients' list. The factory is thread-safe. 1473 func MakeSingleNodeClients(t testutil.TB, clus *Cluster, clients *[]*clientv3.Client) func() *clientv3.Client { 1474 return makeClients(t, clus, clients, func() int { return 0 }) 1475 } 1476 1477 // MakeMultiNodeClients creates factory of clients that all connect to random members. 1478 // All the created clients are put on the 'clients' list. The factory is thread-safe. 1479 func MakeMultiNodeClients(t testutil.TB, clus *Cluster, clients *[]*clientv3.Client) func() *clientv3.Client { 1480 return makeClients(t, clus, clients, func() int { return rand.Intn(len(clus.Members)) }) 1481 } 1482 1483 // CloseClients closes all the clients from the 'clients' list. 1484 func CloseClients(t testutil.TB, clients []*clientv3.Client) { 1485 for _, cli := range clients { 1486 if err := cli.Close(); err != nil { 1487 t.Fatal(err) 1488 } 1489 } 1490 } 1491 1492 type GrpcAPI struct { 1493 // Cluster is the Cluster API for the client'Server connection. 1494 Cluster pb.ClusterClient 1495 // KV is the keyvalue API for the client'Server connection. 1496 KV pb.KVClient 1497 // Lease is the lease API for the client'Server connection. 1498 Lease pb.LeaseClient 1499 // Watch is the watch API for the client'Server connection. 1500 Watch pb.WatchClient 1501 // Maintenance is the maintenance API for the client'Server connection. 1502 Maintenance pb.MaintenanceClient 1503 // Auth is the authentication API for the client'Server connection. 1504 Auth pb.AuthClient 1505 // Lock is the lock API for the client'Server connection. 1506 Lock lockpb.LockClient 1507 // Election is the election API for the client'Server connection. 1508 Election epb.ElectionClient 1509 } 1510 1511 // GetLearnerMembers returns the list of learner members in Cluster using MemberList API. 1512 func (c *Cluster) GetLearnerMembers() ([]*pb.Member, error) { 1513 cli := c.Client(0) 1514 resp, err := cli.MemberList(context.Background()) 1515 if err != nil { 1516 return nil, fmt.Errorf("failed to list member %v", err) 1517 } 1518 var learners []*pb.Member 1519 for _, m := range resp.Members { 1520 if m.IsLearner { 1521 learners = append(learners, m) 1522 } 1523 } 1524 return learners, nil 1525 } 1526 1527 // AddAndLaunchLearnerMember creates a leaner member, adds it to Cluster 1528 // via v3 MemberAdd API, and then launches the new member. 1529 func (c *Cluster) AddAndLaunchLearnerMember(t testutil.TB) { 1530 m := c.mustNewMember(t) 1531 m.IsLearner = true 1532 1533 scheme := SchemeFromTLSInfo(c.Cfg.PeerTLS) 1534 peerURLs := []string{scheme + "://" + m.PeerListeners[0].Addr().String()} 1535 1536 cli := c.Client(0) 1537 _, err := cli.MemberAddAsLearner(context.Background(), peerURLs) 1538 if err != nil { 1539 t.Fatalf("failed to add learner member %v", err) 1540 } 1541 1542 m.InitialPeerURLsMap = types.URLsMap{} 1543 for _, mm := range c.Members { 1544 m.InitialPeerURLsMap[mm.Name] = mm.PeerURLs 1545 } 1546 m.InitialPeerURLsMap[m.Name] = m.PeerURLs 1547 m.NewCluster = false 1548 1549 if err := m.Launch(); err != nil { 1550 t.Fatal(err) 1551 } 1552 1553 c.Members = append(c.Members, m) 1554 1555 c.waitMembersMatch(t) 1556 } 1557 1558 // getMembers returns a list of members in Cluster, in format of etcdserverpb.Member 1559 func (c *Cluster) getMembers() []*pb.Member { 1560 var mems []*pb.Member 1561 for _, m := range c.Members { 1562 mem := &pb.Member{ 1563 Name: m.Name, 1564 PeerURLs: m.PeerURLs.StringSlice(), 1565 ClientURLs: m.ClientURLs.StringSlice(), 1566 IsLearner: m.IsLearner, 1567 } 1568 mems = append(mems, mem) 1569 } 1570 return mems 1571 } 1572 1573 // waitMembersMatch waits until v3rpc MemberList returns the 'same' members info as the 1574 // local 'c.Members', which is the local recording of members in the testing Cluster. With 1575 // the exception that the local recording c.Members does not have info on Member.ID, which 1576 // is generated when the member is been added to Cluster. 1577 // 1578 // Note: 1579 // A successful match means the Member.clientURLs are matched. This means member has already 1580 // finished publishing its server attributes to Cluster. Publishing attributes is a Cluster-wide 1581 // write request (in v2 server). Therefore, at this point, any raft log entries prior to this 1582 // would have already been applied. 1583 // 1584 // If a new member was added to an existing Cluster, at this point, it has finished publishing 1585 // its own server attributes to the Cluster. And therefore by the same argument, it has already 1586 // applied the raft log entries (especially those of type raftpb.ConfChangeType). At this point, 1587 // the new member has the correct view of the Cluster configuration. 1588 // 1589 // Special note on learner member: 1590 // Learner member is only added to a Cluster via v3rpc MemberAdd API (as of v3.4). When starting 1591 // the learner member, its initial view of the Cluster created by peerURLs map does not have info 1592 // on whether or not the new member itself is learner. But at this point, a successful match does 1593 // indicate that the new learner member has applied the raftpb.ConfChangeAddLearnerNode entry 1594 // which was used to add the learner itself to the Cluster, and therefore it has the correct info 1595 // on learner. 1596 func (c *Cluster) waitMembersMatch(t testutil.TB) { 1597 wMembers := c.getMembers() 1598 sort.Sort(SortableProtoMemberSliceByPeerURLs(wMembers)) 1599 cli := c.Client(0) 1600 for { 1601 resp, err := cli.MemberList(context.Background()) 1602 if err != nil { 1603 t.Fatalf("failed to list member %v", err) 1604 } 1605 1606 if len(resp.Members) != len(wMembers) { 1607 continue 1608 } 1609 sort.Sort(SortableProtoMemberSliceByPeerURLs(resp.Members)) 1610 for _, m := range resp.Members { 1611 m.ID = 0 1612 } 1613 if reflect.DeepEqual(resp.Members, wMembers) { 1614 return 1615 } 1616 1617 time.Sleep(framecfg.TickDuration) 1618 } 1619 } 1620 1621 type SortableProtoMemberSliceByPeerURLs []*pb.Member 1622 1623 func (p SortableProtoMemberSliceByPeerURLs) Len() int { return len(p) } 1624 func (p SortableProtoMemberSliceByPeerURLs) Less(i, j int) bool { 1625 return p[i].PeerURLs[0] < p[j].PeerURLs[0] 1626 } 1627 func (p SortableProtoMemberSliceByPeerURLs) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 1628 1629 // MustNewMember creates a new member instance based on the response of V3 Member Add API. 1630 func (c *Cluster) MustNewMember(t testutil.TB, resp *clientv3.MemberAddResponse) *Member { 1631 m := c.mustNewMember(t) 1632 m.IsLearner = resp.Member.IsLearner 1633 m.NewCluster = false 1634 1635 m.InitialPeerURLsMap = types.URLsMap{} 1636 for _, mm := range c.Members { 1637 m.InitialPeerURLsMap[mm.Name] = mm.PeerURLs 1638 } 1639 m.InitialPeerURLsMap[m.Name] = types.MustNewURLs(resp.Member.PeerURLs) 1640 c.Members = append(c.Members, m) 1641 return m 1642 }