github.com/lfch/etcd-io/tests/v3@v3.0.0-20221004140520-eac99acd3e9d/functional/tester/case_sigquit_remove.go (about)

     1  // Copyright 2018 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tester
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"sort"
    21  	"strings"
    22  	"time"
    23  
    24  	clientv3 "github.com/lfch/etcd-io/client/v3"
    25  	"github.com/lfch/etcd-io/tests/v3/functional/rpcpb"
    26  
    27  	"go.uber.org/zap"
    28  )
    29  
    30  func inject_SIGQUIT_ETCD_AND_REMOVE_DATA(clus *Cluster, idx1 int) error {
    31  	cli1, err := clus.Members[idx1].CreateEtcdClient()
    32  	if err != nil {
    33  		return err
    34  	}
    35  	defer cli1.Close()
    36  
    37  	var mresp *clientv3.MemberListResponse
    38  	mresp, err = cli1.MemberList(context.Background())
    39  	var mss []string
    40  	if err == nil && mresp != nil {
    41  		mss = describeMembers(mresp)
    42  	}
    43  	clus.lg.Info(
    44  		"member list before disastrous machine failure",
    45  		zap.String("request-to", clus.Members[idx1].EtcdClientEndpoint),
    46  		zap.Strings("members", mss),
    47  		zap.Error(err),
    48  	)
    49  	if err != nil {
    50  		return err
    51  	}
    52  
    53  	sresp, serr := cli1.Status(context.Background(), clus.Members[idx1].EtcdClientEndpoint)
    54  	if serr != nil {
    55  		return serr
    56  	}
    57  	id1 := sresp.Header.MemberId
    58  	is1 := fmt.Sprintf("%016x", id1)
    59  
    60  	clus.lg.Info(
    61  		"disastrous machine failure START",
    62  		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
    63  		zap.String("target-member-id", is1),
    64  		zap.Error(err),
    65  	)
    66  	err = clus.sendOp(idx1, rpcpb.Operation_SIGQUIT_ETCD_AND_REMOVE_DATA)
    67  	clus.lg.Info(
    68  		"disastrous machine failure END",
    69  		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
    70  		zap.String("target-member-id", is1),
    71  		zap.Error(err),
    72  	)
    73  	if err != nil {
    74  		return err
    75  	}
    76  
    77  	time.Sleep(2 * time.Second)
    78  
    79  	idx2 := (idx1 + 1) % len(clus.Members)
    80  	var cli2 *clientv3.Client
    81  	cli2, err = clus.Members[idx2].CreateEtcdClient()
    82  	if err != nil {
    83  		return err
    84  	}
    85  	defer cli2.Close()
    86  
    87  	// FIXME(bug): this may block forever during
    88  	// "SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT"
    89  	// is the new leader too busy with snapshotting?
    90  	// is raft proposal dropped?
    91  	// enable client keepalive for failover?
    92  	clus.lg.Info(
    93  		"member remove after disaster START",
    94  		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
    95  		zap.String("target-member-id", is1),
    96  		zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
    97  	)
    98  	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
    99  	_, err = cli2.MemberRemove(ctx, id1)
   100  	cancel()
   101  	clus.lg.Info(
   102  		"member remove after disaster END",
   103  		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
   104  		zap.String("target-member-id", is1),
   105  		zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
   106  		zap.Error(err),
   107  	)
   108  	if err != nil {
   109  		return err
   110  	}
   111  
   112  	time.Sleep(2 * time.Second)
   113  
   114  	mresp, err = cli2.MemberList(context.Background())
   115  	mss = []string{}
   116  	if err == nil && mresp != nil {
   117  		mss = describeMembers(mresp)
   118  	}
   119  	clus.lg.Info(
   120  		"member list after member remove",
   121  		zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
   122  		zap.Strings("members", mss),
   123  		zap.Error(err),
   124  	)
   125  	return err
   126  }
   127  
   128  func recover_SIGQUIT_ETCD_AND_REMOVE_DATA(clus *Cluster, idx1 int) error {
   129  	idx2 := (idx1 + 1) % len(clus.Members)
   130  	cli2, err := clus.Members[idx2].CreateEtcdClient()
   131  	if err != nil {
   132  		return err
   133  	}
   134  	defer cli2.Close()
   135  
   136  	_, err = cli2.MemberAdd(context.Background(), clus.Members[idx1].Etcd.AdvertisePeerURLs)
   137  	clus.lg.Info(
   138  		"member add before fresh restart",
   139  		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
   140  		zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
   141  		zap.Error(err),
   142  	)
   143  	if err != nil {
   144  		return err
   145  	}
   146  
   147  	time.Sleep(2 * time.Second)
   148  
   149  	clus.Members[idx1].Etcd.InitialClusterState = "existing"
   150  	err = clus.sendOp(idx1, rpcpb.Operation_RESTART_ETCD)
   151  	clus.lg.Info(
   152  		"fresh restart after member add",
   153  		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
   154  		zap.Error(err),
   155  	)
   156  	if err != nil {
   157  		return err
   158  	}
   159  
   160  	time.Sleep(2 * time.Second)
   161  
   162  	var mresp *clientv3.MemberListResponse
   163  	mresp, err = cli2.MemberList(context.Background())
   164  	var mss []string
   165  	if err == nil && mresp != nil {
   166  		mss = describeMembers(mresp)
   167  	}
   168  	clus.lg.Info(
   169  		"member list after member add",
   170  		zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
   171  		zap.Strings("members", mss),
   172  		zap.Error(err),
   173  	)
   174  	return err
   175  }
   176  
   177  func new_Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus *Cluster) Case {
   178  	cc := caseByFunc{
   179  		rpcpbCase:     rpcpb.Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER,
   180  		injectMember:  inject_SIGQUIT_ETCD_AND_REMOVE_DATA,
   181  		recoverMember: recover_SIGQUIT_ETCD_AND_REMOVE_DATA,
   182  	}
   183  	c := &caseFollower{cc, -1, -1}
   184  	return &caseDelay{
   185  		Case:          c,
   186  		delayDuration: clus.GetCaseDelayDuration(),
   187  	}
   188  }
   189  
   190  func new_Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster) Case {
   191  	return &caseUntilSnapshot{
   192  		rpcpbCase: rpcpb.Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT,
   193  		Case:      new_Case_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus),
   194  	}
   195  }
   196  
   197  func new_Case_SIGQUIT_AND_REMOVE_LEADER(clus *Cluster) Case {
   198  	cc := caseByFunc{
   199  		rpcpbCase:     rpcpb.Case_SIGQUIT_AND_REMOVE_LEADER,
   200  		injectMember:  inject_SIGQUIT_ETCD_AND_REMOVE_DATA,
   201  		recoverMember: recover_SIGQUIT_ETCD_AND_REMOVE_DATA,
   202  	}
   203  	c := &caseLeader{cc, -1, -1}
   204  	return &caseDelay{
   205  		Case:          c,
   206  		delayDuration: clus.GetCaseDelayDuration(),
   207  	}
   208  }
   209  
   210  func new_Case_SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT(clus *Cluster) Case {
   211  	return &caseUntilSnapshot{
   212  		rpcpbCase: rpcpb.Case_SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT,
   213  		Case:      new_Case_SIGQUIT_AND_REMOVE_LEADER(clus),
   214  	}
   215  }
   216  
   217  func describeMembers(mresp *clientv3.MemberListResponse) (ss []string) {
   218  	ss = make([]string, len(mresp.Members))
   219  	for i, m := range mresp.Members {
   220  		ss[i] = fmt.Sprintf("Name %s / ID %016x / ClientURLs %s / PeerURLs %s",
   221  			m.Name,
   222  			m.ID,
   223  			strings.Join(m.ClientURLs, ","),
   224  			strings.Join(m.PeerURLs, ","),
   225  		)
   226  	}
   227  	sort.Strings(ss)
   228  	return ss
   229  }