github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/network/test/cohort2/epochtransition_test.go (about) 1 package cohort2 2 3 import ( 4 "context" 5 "fmt" 6 "math/rand" 7 "os" 8 "reflect" 9 "runtime" 10 "sync" 11 "testing" 12 "time" 13 14 "github.com/ipfs/go-log" 15 "github.com/rs/zerolog" 16 "github.com/stretchr/testify/assert" 17 "github.com/stretchr/testify/mock" 18 "github.com/stretchr/testify/require" 19 "github.com/stretchr/testify/suite" 20 21 "github.com/onflow/flow-go/model/flow" 22 "github.com/onflow/flow-go/model/flow/filter" 23 "github.com/onflow/flow-go/model/libp2p/message" 24 "github.com/onflow/flow-go/module/irrecoverable" 25 "github.com/onflow/flow-go/network" 26 "github.com/onflow/flow-go/network/channels" 27 "github.com/onflow/flow-go/network/internal/testutils" 28 "github.com/onflow/flow-go/network/p2p" 29 "github.com/onflow/flow-go/network/underlay" 30 mockprotocol "github.com/onflow/flow-go/state/protocol/mock" 31 "github.com/onflow/flow-go/utils/unittest" 32 ) 33 34 // MutableIdentityTableSuite tests that the networking layer responds correctly 35 // to changes to the identity table. When nodes are added, we should update our 36 // topology and accept connections from these new nodes. When nodes are removed 37 // or ejected we should update our topology and restrict connections from these 38 // nodes. 39 type MutableIdentityTableSuite struct { 40 suite.Suite 41 testutils.ConduitWrapper 42 testNodes testNodeList 43 removedTestNodes testNodeList // test nodes which might have been removed from the mesh 44 state *mockprotocol.State 45 snapshot *mockprotocol.Snapshot 46 logger zerolog.Logger 47 cancels []context.CancelFunc 48 } 49 50 // testNode encapsulates the node state which includes its identity, libp2p node, network, 51 // mesh engine and the id refresher 52 type testNode struct { 53 id *flow.Identity 54 libp2pNode p2p.LibP2PNode 55 network *underlay.Network 56 engine *testutils.MeshEngine 57 } 58 59 // testNodeList encapsulates a list of test node and 60 // has functions to retrieve the different elements of the test nodes in a concurrency safe manner 61 type testNodeList struct { 62 sync.RWMutex 63 nodes []testNode 64 } 65 66 func newTestNodeList() testNodeList { 67 return testNodeList{} 68 } 69 70 func (t *testNodeList) append(node testNode) { 71 t.Lock() 72 defer t.Unlock() 73 t.nodes = append(t.nodes, node) 74 } 75 76 func (t *testNodeList) remove() testNode { 77 t.Lock() 78 defer t.Unlock() 79 // choose a random node to remove 80 i := rand.Intn(len(t.nodes)) 81 removedNode := t.nodes[i] 82 t.nodes = append(t.nodes[:i], t.nodes[i+1:]...) 83 return removedNode 84 } 85 86 func (t *testNodeList) ids() flow.IdentityList { 87 t.RLock() 88 defer t.RUnlock() 89 ids := make(flow.IdentityList, len(t.nodes)) 90 for i, node := range t.nodes { 91 ids[i] = node.id 92 } 93 return ids 94 } 95 96 func (t *testNodeList) lastAdded() (testNode, error) { 97 t.RLock() 98 defer t.RUnlock() 99 if len(t.nodes) > 0 { 100 return t.nodes[len(t.nodes)-1], nil 101 } 102 return testNode{}, fmt.Errorf("node list empty") 103 } 104 105 func (t *testNodeList) engines() []*testutils.MeshEngine { 106 t.RLock() 107 defer t.RUnlock() 108 engs := make([]*testutils.MeshEngine, len(t.nodes)) 109 for i, node := range t.nodes { 110 engs[i] = node.engine 111 } 112 return engs 113 } 114 115 func (t *testNodeList) networks() []network.EngineRegistry { 116 t.RLock() 117 defer t.RUnlock() 118 nets := make([]network.EngineRegistry, len(t.nodes)) 119 for i, node := range t.nodes { 120 nets[i] = node.network 121 } 122 return nets 123 } 124 125 func (t *testNodeList) libp2pNodes() []p2p.LibP2PNode { 126 t.RLock() 127 defer t.RUnlock() 128 nodes := make([]p2p.LibP2PNode, len(t.nodes)) 129 for i, node := range t.nodes { 130 nodes[i] = node.libp2pNode 131 } 132 return nodes 133 } 134 135 func TestMutableIdentityTable(t *testing.T) { 136 unittest.SkipUnless(t, unittest.TEST_TODO, "broken test") 137 suite.Run(t, new(MutableIdentityTableSuite)) 138 } 139 140 // signalIdentityChanged update IDs for all the current set of nodes (simulating an epoch) 141 func (suite *MutableIdentityTableSuite) signalIdentityChanged() { 142 for _, n := range suite.testNodes.nodes { 143 n.network.UpdateNodeAddresses() 144 } 145 } 146 147 func (suite *MutableIdentityTableSuite) SetupTest() { 148 suite.testNodes = newTestNodeList() 149 suite.removedTestNodes = newTestNodeList() 150 151 nodeCount := 10 152 suite.logger = zerolog.New(os.Stderr).Level(zerolog.ErrorLevel) 153 log.SetAllLoggers(log.LevelError) 154 155 suite.setupStateMock() 156 suite.addNodes(nodeCount) 157 158 // simulate a start of an epoch by signaling a change in the identity table 159 suite.signalIdentityChanged() 160 161 // wait for two lip2p heatbeats for the nodes to discover each other and form the mesh 162 time.Sleep(2 * time.Second) 163 } 164 165 // TearDownTest closes all the networks within a specified timeout 166 func (suite *MutableIdentityTableSuite) TearDownTest() { 167 for _, cancel := range suite.cancels { 168 cancel() 169 } 170 networks := append(suite.testNodes.networks(), suite.removedTestNodes.networks()...) 171 testutils.StopComponents(suite.T(), networks, 3*time.Second) 172 } 173 174 // setupStateMock setup state related mocks (all networks share the same state mock) 175 func (suite *MutableIdentityTableSuite) setupStateMock() { 176 final := unittest.BlockHeaderFixture() 177 suite.state = new(mockprotocol.State) 178 suite.snapshot = new(mockprotocol.Snapshot) 179 suite.snapshot.On("Head").Return(&final, nil) 180 suite.snapshot.On("Phase").Return(flow.EpochPhaseCommitted, nil) 181 // return all the current list of ids for the state.Final.Identities call made by the network 182 suite.snapshot.On("Identities", mock.Anything).Return( 183 func(flow.IdentityFilter[flow.Identity]) flow.IdentityList { 184 return suite.testNodes.ids() 185 }, 186 func(flow.IdentityFilter[flow.Identity]) error { return nil }) 187 suite.state.On("Final").Return(suite.snapshot, nil) 188 } 189 190 // addNodes creates count many new nodes and appends them to the suite state variables 191 func (suite *MutableIdentityTableSuite) addNodes(count int) { 192 ctx, cancel := context.WithCancel(context.Background()) 193 signalerCtx := irrecoverable.NewMockSignalerContext(suite.T(), ctx) 194 sporkId := unittest.IdentifierFixture() 195 ids, nodes := testutils.LibP2PNodeForNetworkFixture(suite.T(), sporkId, count) 196 nets, _ := testutils.NetworksFixture(suite.T(), sporkId, ids, nodes) 197 suite.cancels = append(suite.cancels, cancel) 198 199 // starts the nodes and networks 200 testutils.StartNodes(signalerCtx, suite.T(), nodes) 201 for _, net := range nets { 202 testutils.StartNetworks(signalerCtx, suite.T(), []network.EngineRegistry{net}) 203 unittest.RequireComponentsReadyBefore(suite.T(), 1*time.Second, net) 204 } 205 206 // create the engines for the new nodes 207 engines := make([]*testutils.MeshEngine, count) 208 for i, n := range nets { 209 eng := testutils.NewMeshEngine(suite.T(), n, 100, channels.TestNetworkChannel) 210 engines[i] = eng 211 } 212 213 // create the test engines 214 for i := 0; i < count; i++ { 215 node := testNode{ 216 id: ids[i], 217 libp2pNode: nodes[i], 218 network: nets[i], 219 engine: engines[i], 220 } 221 suite.testNodes.append(node) 222 } 223 } 224 225 // removeNode removes a randomly chosen test node from suite.testNodes and adds it to suite.removedTestNodes 226 func (suite *MutableIdentityTableSuite) removeNode() testNode { 227 removedNode := suite.testNodes.remove() 228 suite.removedTestNodes.append(removedNode) 229 return removedNode 230 } 231 232 // TestNewNodeAdded tests that when a new node is added to the identity list e.g. on an epoch, 233 // then it can connect to the network. 234 func (suite *MutableIdentityTableSuite) TestNewNodeAdded() { 235 236 // add a new node the current list of nodes 237 suite.addNodes(1) 238 239 newNode, err := suite.testNodes.lastAdded() 240 require.NoError(suite.T(), err) 241 newID := newNode.id 242 243 suite.logger.Debug(). 244 Str("new_node", newID.NodeID.String()). 245 Msg("added one node") 246 247 // update IDs for all the networks (simulating an epoch) 248 suite.signalIdentityChanged() 249 250 ids := suite.testNodes.ids() 251 engs := suite.testNodes.engines() 252 253 // check if the new node has sufficient connections with the existing nodes 254 // if it does, then it has been inducted successfully in the network 255 suite.assertConnected(newNode.libp2pNode, suite.testNodes.libp2pNodes()) 256 257 // check that all the engines on this new epoch can talk to each other using any of the three networking primitives 258 suite.assertNetworkPrimitives(ids, engs, nil, nil) 259 } 260 261 // TestNodeRemoved tests that when an existing node is removed from the identity 262 // list (ie. as a result of an ejection or transition into an epoch where that node 263 // has un-staked) then it cannot connect to the network. 264 func (suite *MutableIdentityTableSuite) TestNodeRemoved() { 265 // removed a node 266 removedNode := suite.removeNode() 267 removedID := removedNode.id 268 removedEngine := removedNode.engine 269 270 // update IDs for all the remaining nodes 271 // the removed node continues with the old identity list as we don't want to rely on it updating its ids list 272 suite.signalIdentityChanged() 273 274 remainingIDs := suite.testNodes.ids() 275 remainingEngs := suite.testNodes.engines() 276 277 // assert that the removed node has no connections with any of the other nodes 278 suite.assertDisconnected(removedNode.libp2pNode, suite.testNodes.libp2pNodes()) 279 280 // check that all remaining engines can still talk to each other while the ones removed can't 281 // using any of the three networking primitives 282 removedIDs := []*flow.Identity{removedID} 283 removedEngines := []*testutils.MeshEngine{removedEngine} 284 285 // assert that all three network primitives still work 286 suite.assertNetworkPrimitives(remainingIDs, remainingEngs, removedIDs, removedEngines) 287 } 288 289 // TestNodesAddedAndRemoved tests that: 290 // a. a newly added node can exchange messages with the existing nodes 291 // b. a node that has has been removed cannot exchange messages with the existing nodes 292 func (suite *MutableIdentityTableSuite) TestNodesAddedAndRemoved() { 293 294 // remove a node 295 removedNode := suite.removeNode() 296 removedID := removedNode.id 297 removedEngine := removedNode.engine 298 299 // add a node 300 suite.addNodes(1) 301 newNode, err := suite.testNodes.lastAdded() 302 require.NoError(suite.T(), err) 303 304 // update all current nodes 305 suite.signalIdentityChanged() 306 307 remainingIDs := suite.testNodes.ids() 308 remainingEngs := suite.testNodes.engines() 309 310 // check if the new node has sufficient connections with the existing nodes 311 suite.assertConnected(newNode.libp2pNode, suite.testNodes.libp2pNodes()) 312 313 // assert that the removed node has no connections with any of the other nodes 314 suite.assertDisconnected(removedNode.libp2pNode, suite.testNodes.libp2pNodes()) 315 316 // check that all remaining engines can still talk to each other while the ones removed can't 317 // using any of the three networking primitives 318 removedIDs := []*flow.Identity{removedID} 319 removedEngines := []*testutils.MeshEngine{removedEngine} 320 321 // assert that all three network primitives still work 322 suite.assertNetworkPrimitives(remainingIDs, remainingEngs, removedIDs, removedEngines) 323 } 324 325 // assertConnected checks that a libp2p node is directly connected 326 // to at least half of the other nodes. 327 func (suite *MutableIdentityTableSuite) assertConnected(thisNode p2p.LibP2PNode, allNodes []p2p.LibP2PNode) { 328 t := suite.T() 329 threshold := len(allNodes) / 2 330 require.Eventuallyf(t, func() bool { 331 connections := 0 332 for _, node := range allNodes { 333 if node == thisNode { 334 // we don't want to check if a node is connected to itself 335 continue 336 } 337 connected, err := thisNode.IsConnected(node.ID()) 338 require.NoError(t, err) 339 if connected { 340 connections++ 341 } 342 } 343 suite.logger.Debug(). 344 Int("threshold", threshold). 345 Int("connections", connections). 346 Msg("current connection count") 347 return connections >= threshold 348 }, 5*time.Second, 100*time.Millisecond, "node is not connected to enough nodes") 349 } 350 351 // assertDisconnected checks that a libp2p node is not connected to any of the other nodes specified in the 352 // ids list. 353 func (suite *MutableIdentityTableSuite) assertDisconnected(thisNode p2p.LibP2PNode, allNodes []p2p.LibP2PNode) { 354 t := suite.T() 355 require.Eventuallyf(t, func() bool { 356 for _, node := range allNodes { 357 connected, err := thisNode.IsConnected(node.ID()) 358 require.NoError(t, err) 359 if connected { 360 return false 361 } 362 } 363 return true 364 }, 5*time.Second, 100*time.Millisecond, "node is still connected") 365 } 366 367 // assertNetworkPrimitives asserts that allowed engines can exchange messages between themselves but not with the 368 // disallowed engines using each of the three network primitives 369 func (suite *MutableIdentityTableSuite) assertNetworkPrimitives( 370 allowedIDs flow.IdentityList, 371 allowedEngs []*testutils.MeshEngine, 372 disallowedIDs flow.IdentityList, 373 disallowedEngs []*testutils.MeshEngine) { 374 suite.Run("Publish", func() { 375 suite.exchangeMessages(allowedIDs, allowedEngs, disallowedIDs, disallowedEngs, suite.Publish, false) 376 }) 377 suite.Run("Multicast", func() { 378 suite.exchangeMessages(allowedIDs, allowedEngs, disallowedIDs, disallowedEngs, suite.Multicast, false) 379 }) 380 suite.Run("Unicast", func() { 381 // unicast send from or to a node that has been evicted should fail with an error 382 suite.exchangeMessages(allowedIDs, allowedEngs, disallowedIDs, disallowedEngs, suite.Unicast, true) 383 }) 384 } 385 386 // exchangeMessages verifies that allowed engines can successfully exchange messages between them while disallowed 387 // engines can't using the ConduitSendWrapperFunc network primitive 388 func (suite *MutableIdentityTableSuite) exchangeMessages( 389 allowedIDs flow.IdentityList, 390 allowedEngs []*testutils.MeshEngine, 391 disallowedIDs flow.IdentityList, 392 disallowedEngs []*testutils.MeshEngine, 393 send testutils.ConduitSendWrapperFunc, 394 expectSendErrorForDisallowedIDs bool) { 395 396 // send a message from each of the allowed engine to the other allowed engines 397 for i, allowedEng := range allowedEngs { 398 399 fromID := allowedIDs[i].NodeID 400 targetIDs := allowedIDs.Filter(filter.Not(filter.HasNodeID[flow.Identity](allowedIDs[i].NodeID))) 401 402 err := suite.sendMessage(fromID, allowedEng, targetIDs, send) 403 require.NoError(suite.T(), err) 404 } 405 406 // send a message from each of the allowed engine to all of the disallowed engines 407 if len(disallowedEngs) > 0 { 408 for i, fromEng := range allowedEngs { 409 410 fromID := allowedIDs[i].NodeID 411 targetIDs := disallowedIDs 412 413 err := suite.sendMessage(fromID, fromEng, targetIDs, send) 414 if expectSendErrorForDisallowedIDs { 415 require.Error(suite.T(), err) 416 } 417 } 418 } 419 420 // send a message from each of the disallowed engine to each of the allowed engines 421 for i, fromEng := range disallowedEngs { 422 423 fromID := disallowedIDs[i].NodeID 424 targetIDs := allowedIDs 425 426 err := suite.sendMessage(fromID, fromEng, targetIDs, send) 427 if expectSendErrorForDisallowedIDs { 428 require.Error(suite.T(), err) 429 } 430 } 431 432 count := len(allowedEngs) 433 expectedMsgCnt := count - 1 434 wg := sync.WaitGroup{} 435 // fires a goroutine for each of the allowed engine to listen for incoming messages 436 for i := range allowedEngs { 437 wg.Add(expectedMsgCnt) 438 go func(e *testutils.MeshEngine) { 439 for x := 0; x < expectedMsgCnt; x++ { 440 <-e.Received 441 wg.Done() 442 } 443 }(allowedEngs[i]) 444 } 445 446 // assert that all allowed engines received expectedMsgCnt number of messages 447 unittest.AssertReturnsBefore(suite.T(), wg.Wait, 5*time.Second) 448 // assert that all allowed engines received no other messages 449 for i := range allowedEngs { 450 assert.Empty(suite.T(), allowedEngs[i].Received) 451 } 452 453 // assert that the disallowed engines didn't receive any message 454 for i, eng := range disallowedEngs { 455 unittest.RequireNeverClosedWithin(suite.T(), eng.Received, time.Millisecond, 456 fmt.Sprintf("%s engine should not have recevied message", disallowedIDs[i])) 457 } 458 } 459 460 func (suite *MutableIdentityTableSuite) sendMessage(fromID flow.Identifier, 461 fromEngine *testutils.MeshEngine, 462 toIDs flow.IdentityList, 463 send testutils.ConduitSendWrapperFunc) error { 464 465 primitive := runtime.FuncForPC(reflect.ValueOf(send).Pointer()).Name() 466 event := &message.TestMessage{ 467 Text: fmt.Sprintf("hello from node %s using %s", fromID.String(), primitive), 468 } 469 470 return send(event, fromEngine.Con, toIDs.NodeIDs()...) 471 }