github.com/jpmorganchase/quorum@v21.1.0+incompatible/raft/handler_test.go (about) 1 package raft 2 3 import ( 4 "crypto/ecdsa" 5 "encoding/binary" 6 "fmt" 7 "io/ioutil" 8 "net" 9 "os" 10 "reflect" 11 "testing" 12 "time" 13 "unsafe" 14 15 "github.com/coreos/etcd/wal" 16 "github.com/coreos/etcd/wal/walpb" 17 "github.com/ethereum/go-ethereum/core" 18 "github.com/ethereum/go-ethereum/crypto" 19 "github.com/ethereum/go-ethereum/eth" 20 "github.com/ethereum/go-ethereum/event" 21 "github.com/ethereum/go-ethereum/log" 22 "github.com/ethereum/go-ethereum/node" 23 "github.com/ethereum/go-ethereum/p2p" 24 "github.com/ethereum/go-ethereum/p2p/enode" 25 "github.com/ethereum/go-ethereum/params" 26 ) 27 28 // pm.advanceAppliedIndex() and state updates are in different 29 // transaction boundaries hence there's a probablity that they are 30 // out of sync due to premature shutdown 31 func TestProtocolManager_whenAppliedIndexOutOfSync(t *testing.T) { 32 logger := log.New() 33 logger.SetHandler(log.StreamHandler(os.Stdout, log.TerminalFormat(false))) 34 tmpWorkingDir, err := ioutil.TempDir("", "") 35 if err != nil { 36 t.Fatal(err) 37 } 38 defer func() { 39 _ = os.RemoveAll(tmpWorkingDir) 40 }() 41 count := 3 42 ports := make([]uint16, count) 43 nodeKeys := make([]*ecdsa.PrivateKey, count) 44 peers := make([]*enode.Node, count) 45 for i := 0; i < count; i++ { 46 ports[i] = nextPort(t) 47 nodeKeys[i] = mustNewNodeKey(t) 48 peers[i] = enode.NewV4Hostname(&(nodeKeys[i].PublicKey), net.IPv4(127, 0, 0, 1).String(), 0, 0, int(ports[i])) 49 } 50 raftNodes := make([]*RaftService, count) 51 for i := 0; i < count; i++ { 52 if s, err := startRaftNode(uint16(i+1), ports[i], tmpWorkingDir, nodeKeys[i], peers); err != nil { 53 t.Fatal(err) 54 } else { 55 raftNodes[i] = s 56 } 57 } 58 waitFunc := func() { 59 for { 60 time.Sleep(10 * time.Millisecond) 61 for i := 0; i < count; i++ { 62 if raftNodes[i].raftProtocolManager.role == minterRole { 63 return 64 } 65 } 66 } 67 } 68 waitFunc() 69 logger.Debug("stop the cluster") 70 for i := 0; i < count; i++ { 71 if err := raftNodes[i].Stop(); err != nil { 72 t.Fatal(err) 73 } 74 // somehow the wal dir is still being locked that causes failures in subsequent start 75 // we need to check here to make sure everything is fully stopped 76 for isWalDirStillLocked(fmt.Sprintf("%s/node%d/raft-wal", tmpWorkingDir, i+1)) { 77 logger.Debug("sleep...", "i", i) 78 time.Sleep(10 * time.Millisecond) 79 } 80 logger.Debug("node stopped", "id", i) 81 } 82 logger.Debug("update applied index") 83 // update the index to mimic the issue (set applied index behind for node 0) 84 if err := writeAppliedIndex(tmpWorkingDir, 0, 1); err != nil { 85 t.Fatal(err) 86 } 87 //time.Sleep(3 * time.Second) 88 logger.Debug("restart the cluster") 89 for i := 0; i < count; i++ { 90 if s, err := startRaftNode(uint16(i+1), ports[i], tmpWorkingDir, nodeKeys[i], peers); err != nil { 91 t.Fatal(err) 92 } else { 93 raftNodes[i] = s 94 } 95 } 96 waitFunc() 97 } 98 99 func isWalDirStillLocked(walDir string) bool { 100 var snap walpb.Snapshot 101 w, err := wal.Open(walDir, snap) 102 if err != nil { 103 return true 104 } 105 defer func() { 106 _ = w.Close() 107 }() 108 return false 109 } 110 111 func writeAppliedIndex(workingDir string, node int, index uint64) error { 112 db, err := openQuorumRaftDb(fmt.Sprintf("%s/node%d/quorum-raft-state", workingDir, node+1)) 113 if err != nil { 114 return err 115 } 116 defer func() { 117 _ = db.Close() 118 }() 119 buf := make([]byte, 8) 120 binary.LittleEndian.PutUint64(buf, index) 121 return db.Put(appliedDbKey, buf, noFsync) 122 } 123 124 func mustNewNodeKey(t *testing.T) *ecdsa.PrivateKey { 125 k, err := crypto.GenerateKey() 126 if err != nil { 127 t.Fatal(err) 128 } 129 return k 130 } 131 132 func nextPort(t *testing.T) uint16 { 133 listener, err := net.Listen("tcp", ":0") 134 if err != nil { 135 t.Fatal(err) 136 } 137 return uint16(listener.Addr().(*net.TCPAddr).Port) 138 } 139 140 func prepareServiceContext(key *ecdsa.PrivateKey) (ctx *node.ServiceContext, cfg *node.Config, err error) { 141 defer func() { 142 if r := recover(); r != nil { 143 err = fmt.Errorf("%s", r) 144 ctx = nil 145 cfg = nil 146 } 147 }() 148 cfg = &node.Config{ 149 P2P: p2p.Config{ 150 PrivateKey: key, 151 }, 152 } 153 ctx = &node.ServiceContext{ 154 EventMux: new(event.TypeMux), 155 } 156 // config is private field so we need some workaround to set the value 157 configField := reflect.ValueOf(ctx).Elem().FieldByName("config") 158 configField = reflect.NewAt(configField.Type(), unsafe.Pointer(configField.UnsafeAddr())).Elem() 159 configField.Set(reflect.ValueOf(cfg)) 160 return 161 } 162 163 func startRaftNode(id, port uint16, tmpWorkingDir string, key *ecdsa.PrivateKey, nodes []*enode.Node) (*RaftService, error) { 164 datadir := fmt.Sprintf("%s/node%d", tmpWorkingDir, id) 165 166 ctx, _, err := prepareServiceContext(key) 167 if err != nil { 168 return nil, err 169 } 170 171 e, err := eth.New(ctx, ð.Config{ 172 Genesis: &core.Genesis{Config: params.QuorumTestChainConfig}, 173 }) 174 if err != nil { 175 return nil, err 176 } 177 178 s, err := New(ctx, params.QuorumTestChainConfig, id, port, false, 100*time.Millisecond, e, nodes, datadir, false) 179 if err != nil { 180 return nil, err 181 } 182 183 srv := &p2p.Server{ 184 Config: p2p.Config{ 185 PrivateKey: key, 186 }, 187 } 188 if err := srv.Start(); err != nil { 189 return nil, fmt.Errorf("could not start: %v", err) 190 } 191 if err := s.Start(srv); err != nil { 192 return nil, err 193 } 194 195 return s, nil 196 }