github.com/lfch/etcd-io/tests/v3@v3.0.0-20221004140520-eac99acd3e9d/integration/v3_watch_restore_test.go (about) 1 // Copyright 2018 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package integration 16 17 import ( 18 "context" 19 "fmt" 20 "testing" 21 "time" 22 23 pb "github.com/lfch/etcd-io/api/v3/etcdserverpb" 24 "github.com/lfch/etcd-io/tests/v3/framework/config" 25 "github.com/lfch/etcd-io/tests/v3/framework/integration" 26 ) 27 28 // MustFetchNotEmptyMetric attempts to fetch given 'metric' from 'member', 29 // waiting for not-empty value or 'timeout'. 30 func MustFetchNotEmptyMetric(tb testing.TB, member *integration.Member, metric string, timeout <-chan time.Time) string { 31 metricValue := "" 32 tick := time.Tick(config.TickDuration) 33 for metricValue == "" { 34 tb.Logf("Waiting for metric: %v", metric) 35 select { 36 case <-timeout: 37 tb.Fatalf("Failed to fetch metric %v", metric) 38 return "" 39 case <-tick: 40 var err error 41 metricValue, err = member.Metric(metric) 42 if err != nil { 43 tb.Fatal(err) 44 } 45 } 46 } 47 return metricValue 48 } 49 50 // TestV3WatchRestoreSnapshotUnsync tests whether slow follower can restore 51 // from leader snapshot, and still notify on watchers from an old revision 52 // that were created in synced watcher group in the first place. 53 // TODO: fix panic with gRPC proxy "panic: watcher current revision should not exceed current revision" 54 func TestV3WatchRestoreSnapshotUnsync(t *testing.T) { 55 integration.BeforeTest(t) 56 57 clus := integration.NewCluster(t, &integration.ClusterConfig{ 58 Size: 3, 59 SnapshotCount: 10, 60 SnapshotCatchUpEntries: 5, 61 }) 62 defer clus.Terminate(t) 63 64 // spawn a watcher before shutdown, and put it in synced watcher 65 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 66 defer cancel() 67 wStream, errW := integration.ToGRPC(clus.Client(0)).Watch.Watch(ctx) 68 if errW != nil { 69 t.Fatal(errW) 70 } 71 if err := wStream.Send(&pb.WatchRequest{RequestUnion: &pb.WatchRequest_CreateRequest{ 72 CreateRequest: &pb.WatchCreateRequest{Key: []byte("foo"), StartRevision: 5}}}); err != nil { 73 t.Fatalf("wStream.Send error: %v", err) 74 } 75 wresp, errR := wStream.Recv() 76 if errR != nil { 77 t.Errorf("wStream.Recv error: %v", errR) 78 } 79 if !wresp.Created { 80 t.Errorf("wresp.Created got = %v, want = true", wresp.Created) 81 } 82 83 clus.Members[0].InjectPartition(t, clus.Members[1:]...) 84 initialLead := clus.WaitMembersForLeader(t, clus.Members[1:]) 85 t.Logf("elected lead: %v", clus.Members[initialLead].Server.MemberId()) 86 t.Logf("sleeping for 2 seconds") 87 time.Sleep(2 * time.Second) 88 t.Logf("sleeping for 2 seconds DONE") 89 90 kvc := integration.ToGRPC(clus.Client(1)).KV 91 92 // to trigger snapshot from the leader to the stopped follower 93 for i := 0; i < 15; i++ { 94 _, err := kvc.Put(context.TODO(), &pb.PutRequest{Key: []byte("foo"), Value: []byte("bar")}) 95 if err != nil { 96 t.Errorf("#%d: couldn't put key (%v)", i, err) 97 } 98 } 99 100 // trigger snapshot send from leader to this slow follower 101 // which then calls watchable store Restore 102 clus.Members[0].RecoverPartition(t, clus.Members[1:]...) 103 // We don't expect leadership change here, just recompute the leader'Server index 104 // within clus.Members list. 105 lead := clus.WaitLeader(t) 106 107 // Sending is scheduled on fifo 'sched' within EtcdServer::run, 108 // so it can start delayed after recovery. 109 send := MustFetchNotEmptyMetric(t, clus.Members[lead], 110 "etcd_network_snapshot_send_inflights_total", 111 time.After(5*time.Second)) 112 113 if send != "0" && send != "1" { 114 // 0 if already sent, 1 if sending 115 t.Fatalf("inflight snapshot snapshot_send_inflights_total expected 0 or 1, got %q", send) 116 } 117 118 receives := MustFetchNotEmptyMetric(t, clus.Members[(lead+1)%3], 119 "etcd_network_snapshot_receive_inflights_total", 120 time.After(5*time.Second)) 121 if receives != "0" && receives != "1" { 122 // 0 if already received, 1 if receiving 123 t.Fatalf("inflight snapshot receives expected 0 or 1, got %q", receives) 124 } 125 126 t.Logf("sleeping for 2 seconds") 127 time.Sleep(2 * time.Second) 128 t.Logf("sleeping for 2 seconds DONE") 129 130 // slow follower now applies leader snapshot 131 // should be able to notify on old-revision watchers in unsynced 132 // make sure restore watch operation correctly moves watchers 133 // between synced and unsynced watchers 134 errc := make(chan error, 1) 135 go func() { 136 cresp, cerr := wStream.Recv() 137 if cerr != nil { 138 errc <- cerr 139 return 140 } 141 // from start revision 5 to latest revision 16 142 if len(cresp.Events) != 12 { 143 errc <- fmt.Errorf("expected 12 events, got %+v", cresp.Events) 144 return 145 } 146 errc <- nil 147 }() 148 select { 149 case <-time.After(10 * time.Second): 150 t.Fatal("took too long to receive events from restored watcher") 151 case err := <-errc: 152 if err != nil { 153 t.Fatalf("wStream.Recv error: %v", err) 154 } 155 } 156 }