github.com/kisexp/xdchain@v0.0.0-20211206025815-490d6b732aa7/raft/handler_test.go (about) 1 package raft 2 3 import ( 4 "crypto/ecdsa" 5 "encoding/binary" 6 "fmt" 7 "io/ioutil" 8 "net" 9 "os" 10 "testing" 11 "time" 12 13 "github.com/coreos/etcd/wal" 14 "github.com/coreos/etcd/wal/walpb" 15 "github.com/kisexp/xdchain/core" 16 "github.com/kisexp/xdchain/crypto" 17 "github.com/kisexp/xdchain/eth" 18 "github.com/kisexp/xdchain/log" 19 "github.com/kisexp/xdchain/node" 20 "github.com/kisexp/xdchain/p2p" 21 "github.com/kisexp/xdchain/p2p/enode" 22 "github.com/kisexp/xdchain/params" 23 ) 24 25 // pm.advanceAppliedIndex() and state updates are in different 26 // transaction boundaries hence there's a probablity that they are 27 // out of sync due to premature shutdown 28 func TestProtocolManager_whenAppliedIndexOutOfSync(t *testing.T) { 29 logger := log.New() 30 logger.SetHandler(log.StreamHandler(os.Stdout, log.TerminalFormat(false))) 31 tmpWorkingDir, err := ioutil.TempDir("", "") 32 if err != nil { 33 t.Fatal(err) 34 } 35 defer func() { 36 _ = os.RemoveAll(tmpWorkingDir) 37 }() 38 count := 3 39 ports := make([]uint16, count) 40 nodeKeys := make([]*ecdsa.PrivateKey, count) 41 peers := make([]*enode.Node, count) 42 for i := 0; i < count; i++ { 43 ports[i] = nextPort(t) 44 nodeKeys[i] = mustNewNodeKey(t) 45 peers[i] = enode.NewV4Hostname(&(nodeKeys[i].PublicKey), net.IPv4(127, 0, 0, 1).String(), 0, 0, int(ports[i])) 46 } 47 raftNodes := make([]*RaftService, count) 48 for i := 0; i < count; i++ { 49 if s, err := startRaftNode(uint16(i+1), ports[i], tmpWorkingDir, nodeKeys[i], peers); err != nil { 50 t.Fatal(err) 51 } else { 52 raftNodes[i] = s 53 } 54 } 55 waitFunc := func() { 56 for { 57 time.Sleep(10 * time.Millisecond) 58 for i := 0; i < count; i++ { 59 if raftNodes[i].raftProtocolManager.role == minterRole { 60 return 61 } 62 } 63 } 64 } 65 waitFunc() 66 logger.Debug("stop the cluster") 67 for i := 0; i < count; i++ { 68 if err := raftNodes[i].Stop(); err != nil { 69 t.Fatal(err) 70 } 71 // somehow the wal dir is still being locked that causes failures in subsequent start 72 // we need to check here to make sure everything is fully stopped 73 for isWalDirStillLocked(fmt.Sprintf("%s/node%d/raft-wal", tmpWorkingDir, i+1)) { 74 logger.Debug("sleep...", "i", i) 75 time.Sleep(10 * time.Millisecond) 76 } 77 logger.Debug("node stopped", "id", i) 78 } 79 logger.Debug("update applied index") 80 // update the index to mimic the issue (set applied index behind for node 0) 81 if err := writeAppliedIndex(tmpWorkingDir, 0, 1); err != nil { 82 t.Fatal(err) 83 } 84 //time.Sleep(3 * time.Second) 85 logger.Debug("restart the cluster") 86 for i := 0; i < count; i++ { 87 if s, err := startRaftNode(uint16(i+1), ports[i], tmpWorkingDir, nodeKeys[i], peers); err != nil { 88 t.Fatal(err) 89 } else { 90 raftNodes[i] = s 91 } 92 } 93 waitFunc() 94 } 95 96 func isWalDirStillLocked(walDir string) bool { 97 var snap walpb.Snapshot 98 w, err := wal.Open(walDir, snap) 99 if err != nil { 100 return true 101 } 102 defer func() { 103 _ = w.Close() 104 }() 105 return false 106 } 107 108 func writeAppliedIndex(workingDir string, node int, index uint64) error { 109 db, err := openQuorumRaftDb(fmt.Sprintf("%s/node%d/quorum-raft-state", workingDir, node+1)) 110 if err != nil { 111 return err 112 } 113 defer func() { 114 _ = db.Close() 115 }() 116 buf := make([]byte, 8) 117 binary.LittleEndian.PutUint64(buf, index) 118 return db.Put(appliedDbKey, buf, noFsync) 119 } 120 121 func mustNewNodeKey(t *testing.T) *ecdsa.PrivateKey { 122 k, err := crypto.GenerateKey() 123 if err != nil { 124 t.Fatal(err) 125 } 126 return k 127 } 128 129 func nextPort(t *testing.T) uint16 { 130 listener, err := net.Listen("tcp", ":0") 131 if err != nil { 132 t.Fatal(err) 133 } 134 return uint16(listener.Addr().(*net.TCPAddr).Port) 135 } 136 137 func prepareServiceContext(key *ecdsa.PrivateKey) (stack *node.Node, cfg *node.Config, err error) { 138 defer func() { 139 if r := recover(); r != nil { 140 err = fmt.Errorf("%s", r) 141 stack = nil 142 cfg = nil 143 } 144 }() 145 cfg = &node.Config{ 146 P2P: p2p.Config{ 147 PrivateKey: key, 148 }, 149 } 150 stack, _ = node.New(cfg) 151 return 152 } 153 154 func startRaftNode(id, port uint16, tmpWorkingDir string, key *ecdsa.PrivateKey, nodes []*enode.Node) (*RaftService, error) { 155 raftlogdir := fmt.Sprintf("%s/node%d", tmpWorkingDir, id) 156 157 stack, _, err := prepareServiceContext(key) 158 if err != nil { 159 return nil, err 160 } 161 162 e, err := eth.New(stack, ð.Config{ 163 Genesis: &core.Genesis{Config: params.QuorumTestChainConfig}, 164 }) 165 if err != nil { 166 return nil, err 167 } 168 169 s, err := New(stack, params.QuorumTestChainConfig, id, port, false, 100*time.Millisecond, e, nodes, raftlogdir, false) 170 if err != nil { 171 return nil, err 172 } 173 174 if err := stack.Server().Start(); err != nil { 175 return nil, fmt.Errorf("could not start: %v", err) 176 } 177 if err := s.Start(); err != nil { 178 return nil, err 179 } 180 181 return s, nil 182 }