github.com/lfch/etcd-io/tests/v3@v3.0.0-20221004140520-eac99acd3e9d/integration/v3_watch_restore_test.go (about)

     1  // Copyright 2018 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package integration
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"testing"
    21  	"time"
    22  
    23  	pb "github.com/lfch/etcd-io/api/v3/etcdserverpb"
    24  	"github.com/lfch/etcd-io/tests/v3/framework/config"
    25  	"github.com/lfch/etcd-io/tests/v3/framework/integration"
    26  )
    27  
    28  // MustFetchNotEmptyMetric attempts to fetch given 'metric' from 'member',
    29  // waiting for not-empty value or 'timeout'.
    30  func MustFetchNotEmptyMetric(tb testing.TB, member *integration.Member, metric string, timeout <-chan time.Time) string {
    31  	metricValue := ""
    32  	tick := time.Tick(config.TickDuration)
    33  	for metricValue == "" {
    34  		tb.Logf("Waiting for metric: %v", metric)
    35  		select {
    36  		case <-timeout:
    37  			tb.Fatalf("Failed to fetch metric %v", metric)
    38  			return ""
    39  		case <-tick:
    40  			var err error
    41  			metricValue, err = member.Metric(metric)
    42  			if err != nil {
    43  				tb.Fatal(err)
    44  			}
    45  		}
    46  	}
    47  	return metricValue
    48  }
    49  
    50  // TestV3WatchRestoreSnapshotUnsync tests whether slow follower can restore
    51  // from leader snapshot, and still notify on watchers from an old revision
    52  // that were created in synced watcher group in the first place.
    53  // TODO: fix panic with gRPC proxy "panic: watcher current revision should not exceed current revision"
    54  func TestV3WatchRestoreSnapshotUnsync(t *testing.T) {
    55  	integration.BeforeTest(t)
    56  
    57  	clus := integration.NewCluster(t, &integration.ClusterConfig{
    58  		Size:                   3,
    59  		SnapshotCount:          10,
    60  		SnapshotCatchUpEntries: 5,
    61  	})
    62  	defer clus.Terminate(t)
    63  
    64  	// spawn a watcher before shutdown, and put it in synced watcher
    65  	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
    66  	defer cancel()
    67  	wStream, errW := integration.ToGRPC(clus.Client(0)).Watch.Watch(ctx)
    68  	if errW != nil {
    69  		t.Fatal(errW)
    70  	}
    71  	if err := wStream.Send(&pb.WatchRequest{RequestUnion: &pb.WatchRequest_CreateRequest{
    72  		CreateRequest: &pb.WatchCreateRequest{Key: []byte("foo"), StartRevision: 5}}}); err != nil {
    73  		t.Fatalf("wStream.Send error: %v", err)
    74  	}
    75  	wresp, errR := wStream.Recv()
    76  	if errR != nil {
    77  		t.Errorf("wStream.Recv error: %v", errR)
    78  	}
    79  	if !wresp.Created {
    80  		t.Errorf("wresp.Created got = %v, want = true", wresp.Created)
    81  	}
    82  
    83  	clus.Members[0].InjectPartition(t, clus.Members[1:]...)
    84  	initialLead := clus.WaitMembersForLeader(t, clus.Members[1:])
    85  	t.Logf("elected lead: %v", clus.Members[initialLead].Server.MemberId())
    86  	t.Logf("sleeping for 2 seconds")
    87  	time.Sleep(2 * time.Second)
    88  	t.Logf("sleeping for 2 seconds DONE")
    89  
    90  	kvc := integration.ToGRPC(clus.Client(1)).KV
    91  
    92  	// to trigger snapshot from the leader to the stopped follower
    93  	for i := 0; i < 15; i++ {
    94  		_, err := kvc.Put(context.TODO(), &pb.PutRequest{Key: []byte("foo"), Value: []byte("bar")})
    95  		if err != nil {
    96  			t.Errorf("#%d: couldn't put key (%v)", i, err)
    97  		}
    98  	}
    99  
   100  	// trigger snapshot send from leader to this slow follower
   101  	// which then calls watchable store Restore
   102  	clus.Members[0].RecoverPartition(t, clus.Members[1:]...)
   103  	// We don't expect leadership change here, just recompute the leader'Server index
   104  	// within clus.Members list.
   105  	lead := clus.WaitLeader(t)
   106  
   107  	// Sending is scheduled on fifo 'sched' within EtcdServer::run,
   108  	// so it can start delayed after recovery.
   109  	send := MustFetchNotEmptyMetric(t, clus.Members[lead],
   110  		"etcd_network_snapshot_send_inflights_total",
   111  		time.After(5*time.Second))
   112  
   113  	if send != "0" && send != "1" {
   114  		// 0 if already sent, 1 if sending
   115  		t.Fatalf("inflight snapshot snapshot_send_inflights_total expected 0 or 1, got %q", send)
   116  	}
   117  
   118  	receives := MustFetchNotEmptyMetric(t, clus.Members[(lead+1)%3],
   119  		"etcd_network_snapshot_receive_inflights_total",
   120  		time.After(5*time.Second))
   121  	if receives != "0" && receives != "1" {
   122  		// 0 if already received, 1 if receiving
   123  		t.Fatalf("inflight snapshot receives expected 0 or 1, got %q", receives)
   124  	}
   125  
   126  	t.Logf("sleeping for 2 seconds")
   127  	time.Sleep(2 * time.Second)
   128  	t.Logf("sleeping for 2 seconds DONE")
   129  
   130  	// slow follower now applies leader snapshot
   131  	// should be able to notify on old-revision watchers in unsynced
   132  	// make sure restore watch operation correctly moves watchers
   133  	// between synced and unsynced watchers
   134  	errc := make(chan error, 1)
   135  	go func() {
   136  		cresp, cerr := wStream.Recv()
   137  		if cerr != nil {
   138  			errc <- cerr
   139  			return
   140  		}
   141  		// from start revision 5 to latest revision 16
   142  		if len(cresp.Events) != 12 {
   143  			errc <- fmt.Errorf("expected 12 events, got %+v", cresp.Events)
   144  			return
   145  		}
   146  		errc <- nil
   147  	}()
   148  	select {
   149  	case <-time.After(10 * time.Second):
   150  		t.Fatal("took too long to receive events from restored watcher")
   151  	case err := <-errc:
   152  		if err != nil {
   153  			t.Fatalf("wStream.Recv error: %v", err)
   154  		}
   155  	}
   156  }