github.com/koko1123/flow-go-1@v0.29.6/network/test/epochtransition_test.go (about) 1 package test 2 3 import ( 4 "context" 5 "fmt" 6 "math/rand" 7 "os" 8 "reflect" 9 "runtime" 10 "sync" 11 "testing" 12 "time" 13 14 "github.com/ipfs/go-log" 15 "github.com/rs/zerolog" 16 "github.com/stretchr/testify/assert" 17 "github.com/stretchr/testify/mock" 18 "github.com/stretchr/testify/require" 19 "github.com/stretchr/testify/suite" 20 21 "github.com/koko1123/flow-go-1/model/flow" 22 "github.com/koko1123/flow-go-1/model/flow/filter" 23 "github.com/koko1123/flow-go-1/model/libp2p/message" 24 "github.com/koko1123/flow-go-1/module/irrecoverable" 25 "github.com/koko1123/flow-go-1/network" 26 "github.com/koko1123/flow-go-1/network/internal/testutils" 27 "github.com/koko1123/flow-go-1/network/mocknetwork" 28 mockprotocol "github.com/koko1123/flow-go-1/state/protocol/mock" 29 "github.com/koko1123/flow-go-1/utils/unittest" 30 ) 31 32 // MutableIdentityTableSuite tests that the networking layer responds correctly 33 // to changes to the identity table. When nodes are added, we should update our 34 // topology and accept connections from these new nodes. When nodes are removed 35 // or ejected we should update our topology and restrict connections from these 36 // nodes. 37 type MutableIdentityTableSuite struct { 38 suite.Suite 39 testutils.ConduitWrapper 40 testNodes testNodeList 41 removedTestNodes testNodeList // test nodes which might have been removed from the mesh 42 state *mockprotocol.State 43 snapshot *mockprotocol.Snapshot 44 logger zerolog.Logger 45 cancels []context.CancelFunc 46 } 47 48 // testNode encapsulates the node state which includes its identity, middleware, network, 49 // mesh engine and the id refresher 50 type testNode struct { 51 id *flow.Identity 52 mw network.Middleware 53 net network.Network 54 engine *testutils.MeshEngine 55 } 56 57 // testNodeList encapsulates a list of test node and 58 // has functions to retrieve the different elements of the test nodes in a concurrency safe manner 59 type testNodeList struct { 60 sync.RWMutex 61 nodes []testNode 62 } 63 64 func newTestNodeList() testNodeList { 65 return testNodeList{} 66 } 67 68 func (t *testNodeList) append(node testNode) { 69 t.Lock() 70 defer t.Unlock() 71 t.nodes = append(t.nodes, node) 72 } 73 74 func (t *testNodeList) remove() testNode { 75 t.Lock() 76 defer t.Unlock() 77 // choose a random node to remove 78 i := rand.Intn(len(t.nodes)) 79 removedNode := t.nodes[i] 80 t.nodes = append(t.nodes[:i], t.nodes[i+1:]...) 81 return removedNode 82 } 83 84 func (t *testNodeList) ids() flow.IdentityList { 85 t.RLock() 86 defer t.RUnlock() 87 ids := make(flow.IdentityList, len(t.nodes)) 88 for i, node := range t.nodes { 89 ids[i] = node.id 90 } 91 return ids 92 } 93 94 func (t *testNodeList) lastAdded() (testNode, error) { 95 t.RLock() 96 defer t.RUnlock() 97 if len(t.nodes) > 0 { 98 return t.nodes[len(t.nodes)-1], nil 99 } 100 return testNode{}, fmt.Errorf("node list empty") 101 } 102 103 func (t *testNodeList) engines() []*testutils.MeshEngine { 104 t.RLock() 105 defer t.RUnlock() 106 engs := make([]*testutils.MeshEngine, len(t.nodes)) 107 for i, node := range t.nodes { 108 engs[i] = node.engine 109 } 110 return engs 111 } 112 113 func (t *testNodeList) networks() []network.Network { 114 t.RLock() 115 defer t.RUnlock() 116 nets := make([]network.Network, len(t.nodes)) 117 for i, node := range t.nodes { 118 nets[i] = node.net 119 } 120 return nets 121 } 122 123 func TestMutableIdentityTable(t *testing.T) { 124 unittest.SkipUnless(t, unittest.TEST_TODO, "broken test") 125 suite.Run(t, new(MutableIdentityTableSuite)) 126 } 127 128 // signalIdentityChanged update IDs for all the current set of nodes (simulating an epoch) 129 func (suite *MutableIdentityTableSuite) signalIdentityChanged() { 130 for _, n := range suite.testNodes.nodes { 131 n.mw.UpdateNodeAddresses() 132 } 133 } 134 135 func (suite *MutableIdentityTableSuite) SetupTest() { 136 suite.testNodes = newTestNodeList() 137 suite.removedTestNodes = newTestNodeList() 138 rand.Seed(time.Now().UnixNano()) 139 nodeCount := 10 140 suite.logger = zerolog.New(os.Stderr).Level(zerolog.ErrorLevel) 141 log.SetAllLoggers(log.LevelError) 142 143 suite.setupStateMock() 144 suite.addNodes(nodeCount) 145 146 // simulate a start of an epoch by signaling a change in the identity table 147 suite.signalIdentityChanged() 148 149 // wait for two lip2p heatbeats for the nodes to discover each other and form the mesh 150 time.Sleep(2 * time.Second) 151 } 152 153 // TearDownTest closes all the networks within a specified timeout 154 func (suite *MutableIdentityTableSuite) TearDownTest() { 155 for _, cancel := range suite.cancels { 156 cancel() 157 } 158 networks := append(suite.testNodes.networks(), suite.removedTestNodes.networks()...) 159 testutils.StopComponents(suite.T(), networks, 3*time.Second) 160 } 161 162 // setupStateMock setup state related mocks (all networks share the same state mock) 163 func (suite *MutableIdentityTableSuite) setupStateMock() { 164 final := unittest.BlockHeaderFixture() 165 suite.state = new(mockprotocol.State) 166 suite.snapshot = new(mockprotocol.Snapshot) 167 suite.snapshot.On("Head").Return(&final, nil) 168 suite.snapshot.On("Phase").Return(flow.EpochPhaseCommitted, nil) 169 // return all the current list of ids for the state.Final.Identities call made by the network 170 suite.snapshot.On("Identities", mock.Anything).Return( 171 func(flow.IdentityFilter) flow.IdentityList { 172 return suite.testNodes.ids() 173 }, 174 func(flow.IdentityFilter) error { return nil }) 175 suite.state.On("Final").Return(suite.snapshot, nil) 176 } 177 178 // addNodes creates count many new nodes and appends them to the suite state variables 179 func (suite *MutableIdentityTableSuite) addNodes(count int) { 180 ctx, cancel := context.WithCancel(context.Background()) 181 signalerCtx := irrecoverable.NewMockSignalerContext(suite.T(), ctx) 182 183 // create the ids, middlewares and networks 184 ids, nodes, mws, nets, _ := testutils.GenerateIDsMiddlewaresNetworks( 185 suite.T(), 186 count, 187 suite.logger, 188 unittest.NetworkCodec(), 189 mocknetwork.NewViolationsConsumer(suite.T()), 190 ) 191 suite.cancels = append(suite.cancels, cancel) 192 193 testutils.StartNodesAndNetworks(signalerCtx, suite.T(), nodes, nets, 100*time.Millisecond) 194 195 // create the engines for the new nodes 196 engines := testutils.GenerateEngines(suite.T(), nets) 197 198 // create the test engines 199 for i := 0; i < count; i++ { 200 node := testNode{ 201 id: ids[i], 202 mw: mws[i], 203 net: nets[i], 204 engine: engines[i], 205 } 206 suite.testNodes.append(node) 207 } 208 } 209 210 // removeNode removes a randomly chosen test node from suite.testNodes and adds it to suite.removedTestNodes 211 func (suite *MutableIdentityTableSuite) removeNode() testNode { 212 removedNode := suite.testNodes.remove() 213 suite.removedTestNodes.append(removedNode) 214 return removedNode 215 } 216 217 // TestNewNodeAdded tests that when a new node is added to the identity list e.g. on an epoch, 218 // then it can connect to the network. 219 func (suite *MutableIdentityTableSuite) TestNewNodeAdded() { 220 221 // add a new node the current list of nodes 222 suite.addNodes(1) 223 224 newNode, err := suite.testNodes.lastAdded() 225 require.NoError(suite.T(), err) 226 newID := newNode.id 227 newMiddleware := newNode.mw 228 229 suite.logger.Debug(). 230 Str("new_node", newID.NodeID.String()). 231 Msg("added one node") 232 233 // update IDs for all the networks (simulating an epoch) 234 suite.signalIdentityChanged() 235 236 ids := suite.testNodes.ids() 237 engs := suite.testNodes.engines() 238 239 // check if the new node has sufficient connections with the existing nodes 240 // if it does, then it has been inducted successfully in the network 241 suite.assertConnected(newMiddleware, ids.Filter(filter.Not(filter.HasNodeID(newID.NodeID)))) 242 243 // check that all the engines on this new epoch can talk to each other using any of the three networking primitives 244 suite.assertNetworkPrimitives(ids, engs, nil, nil) 245 } 246 247 // TestNodeRemoved tests that when an existing node is removed from the identity 248 // list (ie. as a result of an ejection or transition into an epoch where that node 249 // has un-staked) then it cannot connect to the network. 250 func (suite *MutableIdentityTableSuite) TestNodeRemoved() { 251 252 // removed a node 253 removedNode := suite.removeNode() 254 removedID := removedNode.id 255 removedMiddleware := removedNode.mw 256 removedEngine := removedNode.engine 257 258 // update IDs for all the remaining nodes 259 // the removed node continues with the old identity list as we don't want to rely on it updating its ids list 260 suite.signalIdentityChanged() 261 262 remainingIDs := suite.testNodes.ids() 263 remainingEngs := suite.testNodes.engines() 264 265 // assert that the removed node has no connections with any of the other nodes 266 suite.assertDisconnected(removedMiddleware, remainingIDs) 267 268 // check that all remaining engines can still talk to each other while the ones removed can't 269 // using any of the three networking primitives 270 removedIDs := []*flow.Identity{removedID} 271 removedEngines := []*testutils.MeshEngine{removedEngine} 272 273 // assert that all three network primitives still work 274 suite.assertNetworkPrimitives(remainingIDs, remainingEngs, removedIDs, removedEngines) 275 } 276 277 // TestNodesAddedAndRemoved tests that: 278 // a. a newly added node can exchange messages with the existing nodes 279 // b. a node that has has been removed cannot exchange messages with the existing nodes 280 func (suite *MutableIdentityTableSuite) TestNodesAddedAndRemoved() { 281 282 // remove a node 283 removedNode := suite.removeNode() 284 removedID := removedNode.id 285 removedMiddleware := removedNode.mw 286 removedEngine := removedNode.engine 287 288 // add a node 289 suite.addNodes(1) 290 newNode, err := suite.testNodes.lastAdded() 291 require.NoError(suite.T(), err) 292 newID := newNode.id 293 newMiddleware := newNode.mw 294 295 // update all current nodes 296 suite.signalIdentityChanged() 297 298 remainingIDs := suite.testNodes.ids() 299 remainingEngs := suite.testNodes.engines() 300 301 // check if the new node has sufficient connections with the existing nodes 302 suite.assertConnected(newMiddleware, remainingIDs.Filter(filter.Not(filter.HasNodeID(newID.NodeID)))) 303 304 // assert that the removed node has no connections with any of the other nodes 305 suite.assertDisconnected(removedMiddleware, remainingIDs) 306 307 // check that all remaining engines can still talk to each other while the ones removed can't 308 // using any of the three networking primitives 309 removedIDs := []*flow.Identity{removedID} 310 removedEngines := []*testutils.MeshEngine{removedEngine} 311 312 // assert that all three network primitives still work 313 suite.assertNetworkPrimitives(remainingIDs, remainingEngs, removedIDs, removedEngines) 314 } 315 316 // assertConnected checks that the middleware of a node is directly connected 317 // to at least half of the other nodes. 318 func (suite *MutableIdentityTableSuite) assertConnected(mw network.Middleware, ids flow.IdentityList) { 319 t := suite.T() 320 threshold := len(ids) / 2 321 require.Eventuallyf(t, func() bool { 322 connections := 0 323 for _, id := range ids { 324 connected, err := mw.IsConnected(id.NodeID) 325 require.NoError(t, err) 326 if connected { 327 connections++ 328 } 329 } 330 suite.logger.Debug(). 331 Int("threshold", threshold). 332 Int("connections", connections). 333 Msg("current connection count") 334 return connections >= threshold 335 }, 5*time.Second, 100*time.Millisecond, "node is not connected to enough nodes") 336 } 337 338 // assertDisconnected checks that the middleware of a node is not connected to any of the other nodes specified in the 339 // ids list 340 func (suite *MutableIdentityTableSuite) assertDisconnected(mw network.Middleware, ids flow.IdentityList) { 341 t := suite.T() 342 require.Eventuallyf(t, func() bool { 343 for _, id := range ids { 344 connected, err := mw.IsConnected(id.NodeID) 345 require.NoError(t, err) 346 if connected { 347 return false 348 } 349 } 350 return true 351 }, 5*time.Second, 100*time.Millisecond, "node is still connected") 352 } 353 354 // assertNetworkPrimitives asserts that allowed engines can exchange messages between themselves but not with the 355 // disallowed engines using each of the three network primitives 356 func (suite *MutableIdentityTableSuite) assertNetworkPrimitives( 357 allowedIDs flow.IdentityList, 358 allowedEngs []*testutils.MeshEngine, 359 disallowedIDs flow.IdentityList, 360 disallowedEngs []*testutils.MeshEngine) { 361 suite.Run("Publish", func() { 362 suite.exchangeMessages(allowedIDs, allowedEngs, disallowedIDs, disallowedEngs, suite.Publish, false) 363 }) 364 suite.Run("Multicast", func() { 365 suite.exchangeMessages(allowedIDs, allowedEngs, disallowedIDs, disallowedEngs, suite.Multicast, false) 366 }) 367 suite.Run("Unicast", func() { 368 // unicast send from or to a node that has been evicted should fail with an error 369 suite.exchangeMessages(allowedIDs, allowedEngs, disallowedIDs, disallowedEngs, suite.Unicast, true) 370 }) 371 } 372 373 // exchangeMessages verifies that allowed engines can successfully exchange messages between them while disallowed 374 // engines can't using the ConduitSendWrapperFunc network primitive 375 func (suite *MutableIdentityTableSuite) exchangeMessages( 376 allowedIDs flow.IdentityList, 377 allowedEngs []*testutils.MeshEngine, 378 disallowedIDs flow.IdentityList, 379 disallowedEngs []*testutils.MeshEngine, 380 send testutils.ConduitSendWrapperFunc, 381 expectSendErrorForDisallowedIDs bool) { 382 383 // send a message from each of the allowed engine to the other allowed engines 384 for i, allowedEng := range allowedEngs { 385 386 fromID := allowedIDs[i].NodeID 387 targetIDs := allowedIDs.Filter(filter.Not(filter.HasNodeID(allowedIDs[i].NodeID))) 388 389 err := suite.sendMessage(fromID, allowedEng, targetIDs, send) 390 require.NoError(suite.T(), err) 391 } 392 393 // send a message from each of the allowed engine to all of the disallowed engines 394 if len(disallowedEngs) > 0 { 395 for i, fromEng := range allowedEngs { 396 397 fromID := allowedIDs[i].NodeID 398 targetIDs := disallowedIDs 399 400 err := suite.sendMessage(fromID, fromEng, targetIDs, send) 401 if expectSendErrorForDisallowedIDs { 402 require.Error(suite.T(), err) 403 } 404 } 405 } 406 407 // send a message from each of the disallowed engine to each of the allowed engines 408 for i, fromEng := range disallowedEngs { 409 410 fromID := disallowedIDs[i].NodeID 411 targetIDs := allowedIDs 412 413 err := suite.sendMessage(fromID, fromEng, targetIDs, send) 414 if expectSendErrorForDisallowedIDs { 415 require.Error(suite.T(), err) 416 } 417 } 418 419 count := len(allowedEngs) 420 expectedMsgCnt := count - 1 421 wg := sync.WaitGroup{} 422 // fires a goroutine for each of the allowed engine to listen for incoming messages 423 for i := range allowedEngs { 424 wg.Add(expectedMsgCnt) 425 go func(e *testutils.MeshEngine) { 426 for x := 0; x < expectedMsgCnt; x++ { 427 <-e.Received 428 wg.Done() 429 } 430 }(allowedEngs[i]) 431 } 432 433 // assert that all allowed engines received expectedMsgCnt number of messages 434 unittest.AssertReturnsBefore(suite.T(), wg.Wait, 5*time.Second) 435 // assert that all allowed engines received no other messages 436 for i := range allowedEngs { 437 assert.Empty(suite.T(), allowedEngs[i].Received) 438 } 439 440 // assert that the disallowed engines didn't receive any message 441 for i, eng := range disallowedEngs { 442 unittest.RequireNeverClosedWithin(suite.T(), eng.Received, time.Millisecond, 443 fmt.Sprintf("%s engine should not have recevied message", disallowedIDs[i])) 444 } 445 } 446 447 func (suite *MutableIdentityTableSuite) sendMessage(fromID flow.Identifier, 448 fromEngine *testutils.MeshEngine, 449 toIDs flow.IdentityList, 450 send testutils.ConduitSendWrapperFunc) error { 451 452 primitive := runtime.FuncForPC(reflect.ValueOf(send).Pointer()).Name() 453 event := &message.TestMessage{ 454 Text: fmt.Sprintf("hello from node %s using %s", fromID.String(), primitive), 455 } 456 457 return send(event, fromEngine.Con, toIDs.NodeIDs()...) 458 }