github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/network/test/cohort1/meshengine_test.go (about) 1 package cohort1 2 3 import ( 4 "context" 5 "fmt" 6 "math/rand" 7 "os" 8 "strconv" 9 "strings" 10 "sync" 11 "testing" 12 "time" 13 14 "github.com/ipfs/go-log" 15 pubsub "github.com/libp2p/go-libp2p-pubsub" 16 "github.com/rs/zerolog" 17 "github.com/stretchr/testify/assert" 18 "github.com/stretchr/testify/require" 19 "github.com/stretchr/testify/suite" 20 21 "github.com/onflow/flow-go/config" 22 "github.com/onflow/flow-go/model/flow" 23 "github.com/onflow/flow-go/model/flow/filter" 24 "github.com/onflow/flow-go/model/libp2p/message" 25 "github.com/onflow/flow-go/module/irrecoverable" 26 "github.com/onflow/flow-go/module/metrics" 27 "github.com/onflow/flow-go/module/observable" 28 "github.com/onflow/flow-go/network" 29 "github.com/onflow/flow-go/network/channels" 30 "github.com/onflow/flow-go/network/internal/testutils" 31 "github.com/onflow/flow-go/network/p2p" 32 p2pnode "github.com/onflow/flow-go/network/p2p/node" 33 p2ptest "github.com/onflow/flow-go/network/p2p/test" 34 "github.com/onflow/flow-go/network/underlay" 35 "github.com/onflow/flow-go/utils/unittest" 36 ) 37 38 // MeshEngineTestSuite evaluates the message delivery functionality for the overlay 39 // of engines over a complete graph 40 type MeshEngineTestSuite struct { 41 suite.Suite 42 testutils.ConduitWrapper // used as a wrapper around conduit methods 43 networks []*underlay.Network // used to keep track of the networks 44 libp2pNodes []p2p.LibP2PNode // used to keep track of the libp2p nodes 45 ids flow.IdentityList // used to keep track of the identifiers associated with networks 46 obs chan string // used to keep track of Protect events tagged by pubsub messages 47 cancel context.CancelFunc 48 } 49 50 // TestMeshNetTestSuite runs all tests in this test suit 51 func TestMeshNetTestSuite(t *testing.T) { 52 suite.Run(t, new(MeshEngineTestSuite)) 53 } 54 55 // SetupTest is executed prior to each test in this test suite. It creates and initializes 56 // a set of network instances, sets up connection managers, nodes, identities, observables, etc. 57 // This setup ensures that all necessary configurations are in place before running the tests. 58 func (suite *MeshEngineTestSuite) SetupTest() { 59 // defines total number of nodes in our network (minimum 3 needed to use 1-k messaging) 60 const count = 10 61 logger := zerolog.New(os.Stderr).Level(zerolog.ErrorLevel) 62 log.SetAllLoggers(log.LevelError) 63 64 // set up a channel to receive pubsub tags from connManagers of the nodes 65 peerChannel := make(chan string) 66 67 // Tag Observables Usage Explanation: 68 // The tagsObserver is used to observe connections tagged by pubsub messages. This is instrumental in understanding 69 // the connectivity between different peers and verifying the formation of the mesh within this test suite. 70 // Issues: 71 // - Deviation from Production Code: The usage of tag observables here may not reflect the behavior in the production environment. 72 // - Mask Issues in the Production Environment: The observables tied to testing might lead to behaviors or errors that are 73 // masked or not evident within the actual production code. 74 // TODO: Evaluate the necessity of tag observables in this test and consider addressing the deviation from production 75 // code and potential mask issues. Evaluate the possibility of removing this part eventually. 76 ob := tagsObserver{ 77 tags: peerChannel, 78 log: logger, 79 } 80 81 ctx, cancel := context.WithCancel(context.Background()) 82 suite.cancel = cancel 83 84 signalerCtx := irrecoverable.NewMockSignalerContext(suite.T(), ctx) 85 86 sporkId := unittest.IdentifierFixture() 87 libP2PNodes := make([]p2p.LibP2PNode, 0) 88 identities := make(flow.IdentityList, 0) 89 tagObservables := make([]observable.Observable, 0) 90 idProvider := unittest.NewUpdatableIDProvider(flow.IdentityList{}) 91 defaultFlowConfig, err := config.DefaultConfig() 92 require.NoError(suite.T(), err) 93 opts := []p2ptest.NodeFixtureParameterOption{p2ptest.WithUnicastHandlerFunc(nil)} 94 95 for i := 0; i < count; i++ { 96 connManager, err := testutils.NewTagWatchingConnManager( 97 unittest.Logger(), 98 metrics.NewNoopCollector(), 99 &defaultFlowConfig.NetworkConfig.ConnectionManager) 100 require.NoError(suite.T(), err) 101 102 opts = append(opts, p2ptest.WithConnectionManager(connManager)) 103 node, nodeId := p2ptest.NodeFixture(suite.T(), 104 sporkId, 105 suite.T().Name(), 106 idProvider, 107 opts...) 108 libP2PNodes = append(libP2PNodes, node) 109 identities = append(identities, &nodeId) 110 tagObservables = append(tagObservables, connManager) 111 } 112 idProvider.SetIdentities(identities) 113 114 suite.libp2pNodes = libP2PNodes 115 suite.ids = identities 116 117 suite.networks, _ = testutils.NetworksFixture(suite.T(), sporkId, suite.ids, suite.libp2pNodes) 118 // starts the nodes and networks 119 testutils.StartNodes(signalerCtx, suite.T(), suite.libp2pNodes) 120 for _, net := range suite.networks { 121 testutils.StartNetworks(signalerCtx, suite.T(), []network.EngineRegistry{net}) 122 unittest.RequireComponentsReadyBefore(suite.T(), 1*time.Second, net) 123 } 124 125 for _, observableConnMgr := range tagObservables { 126 observableConnMgr.Subscribe(&ob) 127 } 128 suite.obs = peerChannel 129 } 130 131 // TearDownTest closes the networks within a specified timeout 132 func (suite *MeshEngineTestSuite) TearDownTest() { 133 suite.cancel() 134 testutils.StopComponents(suite.T(), suite.networks, 3*time.Second) 135 testutils.StopComponents(suite.T(), suite.libp2pNodes, 3*time.Second) 136 } 137 138 // TestAllToAll_Publish evaluates the network of mesh engines against allToAllScenario scenario. 139 // Network instances during this test use their Publish method to disseminate messages. 140 func (suite *MeshEngineTestSuite) TestAllToAll_Publish() { 141 suite.allToAllScenario(suite.Publish) 142 } 143 144 // TestAllToAll_Multicast evaluates the network of mesh engines against allToAllScenario scenario. 145 // Network instances during this test use their Multicast method to disseminate messages. 146 func (suite *MeshEngineTestSuite) TestAllToAll_Multicast() { 147 suite.allToAllScenario(suite.Multicast) 148 } 149 150 // TestAllToAll_Unicast evaluates the network of mesh engines against allToAllScenario scenario. 151 // Network instances during this test use their Unicast method to disseminate messages. 152 func (suite *MeshEngineTestSuite) TestAllToAll_Unicast() { 153 suite.allToAllScenario(suite.Unicast) 154 } 155 156 // TestTargetedValidators_Unicast tests if only the intended recipients in a 1-k messaging actually receive the message. 157 // The messages are disseminated through the Unicast method of conduits. 158 func (suite *MeshEngineTestSuite) TestTargetedValidators_Unicast() { 159 suite.targetValidatorScenario(suite.Unicast) 160 } 161 162 // TestTargetedValidators_Multicast tests if only the intended recipients in a 1-k messaging actually receive the 163 // message. 164 // The messages are disseminated through the Multicast method of conduits. 165 func (suite *MeshEngineTestSuite) TestTargetedValidators_Multicast() { 166 suite.targetValidatorScenario(suite.Multicast) 167 } 168 169 // TestTargetedValidators_Publish tests if only the intended recipients in a 1-k messaging actually receive the message. 170 // The messages are disseminated through the Multicast method of conduits. 171 func (suite *MeshEngineTestSuite) TestTargetedValidators_Publish() { 172 suite.targetValidatorScenario(suite.Publish) 173 } 174 175 // TestMaxMessageSize_Unicast evaluates the messageSizeScenario scenario using 176 // the Unicast method of conduits. 177 func (suite *MeshEngineTestSuite) TestMaxMessageSize_Unicast() { 178 suite.messageSizeScenario(suite.Unicast, underlay.DefaultMaxUnicastMsgSize) 179 } 180 181 // TestMaxMessageSize_Multicast evaluates the messageSizeScenario scenario using 182 // the Multicast method of conduits. 183 func (suite *MeshEngineTestSuite) TestMaxMessageSize_Multicast() { 184 suite.messageSizeScenario(suite.Multicast, p2pnode.DefaultMaxPubSubMsgSize) 185 } 186 187 // TestMaxMessageSize_Publish evaluates the messageSizeScenario scenario using the 188 // Publish method of conduits. 189 func (suite *MeshEngineTestSuite) TestMaxMessageSize_Publish() { 190 suite.messageSizeScenario(suite.Publish, p2pnode.DefaultMaxPubSubMsgSize) 191 } 192 193 // TestUnregister_Publish tests that an engine cannot send any message using Publish 194 // or receive any messages after the conduit is closed 195 func (suite *MeshEngineTestSuite) TestUnregister_Publish() { 196 suite.conduitCloseScenario(suite.Publish) 197 } 198 199 // TestUnregister_Publish tests that an engine cannot send any message using Multicast 200 // or receive any messages after the conduit is closed 201 func (suite *MeshEngineTestSuite) TestUnregister_Multicast() { 202 suite.conduitCloseScenario(suite.Multicast) 203 } 204 205 // TestUnregister_Publish tests that an engine cannot send any message using Unicast 206 // or receive any messages after the conduit is closed 207 func (suite *MeshEngineTestSuite) TestUnregister_Unicast() { 208 suite.conduitCloseScenario(suite.Unicast) 209 } 210 211 // allToAllScenario creates a complete mesh of the engines, where each engine x sends a 212 // "hello from node x" to other engines. It then evaluates the correctness of message 213 // delivery as well as the content of the messages. This scenario tests the capability of 214 // the engines to communicate in a fully connected graph, ensuring both the reachability 215 // of messages and the integrity of their contents. 216 func (suite *MeshEngineTestSuite) allToAllScenario(send testutils.ConduitSendWrapperFunc) { 217 // allows nodes to find each other in case of Mulitcast and Publish 218 testutils.OptionalSleep(send) 219 220 // creating engines 221 count := len(suite.networks) 222 engs := make([]*testutils.MeshEngine, 0) 223 wg := sync.WaitGroup{} 224 225 // logs[i][j] keeps the message that node i sends to node j 226 logs := make(map[int][]string) 227 for i := range suite.networks { 228 eng := testutils.NewMeshEngine(suite.Suite.T(), suite.networks[i], count-1, channels.TestNetworkChannel) 229 engs = append(engs, eng) 230 logs[i] = make([]string, 0) 231 } 232 233 // allow nodes to heartbeat and discover each other 234 // each node will register ~D protect messages, where D is the default out-degree 235 for i := 0; i < pubsub.GossipSubD*count; i++ { 236 select { 237 case <-suite.obs: 238 case <-time.After(8 * time.Second): 239 assert.FailNow(suite.T(), "could not receive pubsub tag indicating mesh formed") 240 } 241 } 242 243 // Each node broadcasting a message to all others 244 for i := range suite.networks { 245 event := &message.TestMessage{ 246 Text: fmt.Sprintf("hello from node %v", i), 247 } 248 249 // others keeps the identifier of all nodes except ith node 250 others := suite.ids.Filter(filter.Not(filter.HasNodeID[flow.Identity](suite.ids[i].NodeID))).NodeIDs() 251 require.NoError(suite.Suite.T(), send(event, engs[i].Con, others...)) 252 wg.Add(count - 1) 253 } 254 255 // fires a goroutine for each engine that listens to incoming messages 256 for i := range suite.networks { 257 go func(e *testutils.MeshEngine) { 258 for x := 0; x < count-1; x++ { 259 <-e.Received 260 wg.Done() 261 } 262 }(engs[i]) 263 } 264 265 unittest.AssertReturnsBefore(suite.Suite.T(), wg.Wait, 30*time.Second) 266 267 // evaluates that all messages are received 268 for index, e := range engs { 269 // confirms the number of received messages at each node 270 if len(e.Event) != (count - 1) { 271 assert.Fail(suite.Suite.T(), 272 fmt.Sprintf("Message reception mismatch at node %v. Expected: %v, Got: %v", index, count-1, len(e.Event))) 273 } 274 275 for i := 0; i < count-1; i++ { 276 assertChannelReceived(suite.T(), e, channels.TestNetworkChannel) 277 } 278 279 // extracts failed messages 280 receivedIndices, err := extractSenderID(count, e.Event, "hello from node") 281 require.NoError(suite.Suite.T(), err) 282 283 for j := 0; j < count; j++ { 284 // evaluates self-gossip 285 if j == index { 286 assert.False(suite.Suite.T(), (receivedIndices)[index], fmt.Sprintf("self gossiped for node %v detected", index)) 287 } 288 // evaluates content 289 if !(receivedIndices)[j] { 290 assert.False(suite.Suite.T(), (receivedIndices)[index], 291 fmt.Sprintf("Message not found in node #%v's messages. Expected: Message from node %v. Got: No message", index, j)) 292 } 293 } 294 } 295 } 296 297 // targetValidatorScenario sends a single message from last node to the first half of the nodes 298 // based on identifiers list. 299 // It then verifies that only the intended recipients receive the message. 300 // Message dissemination is done using the send wrapper of conduit. 301 func (suite *MeshEngineTestSuite) targetValidatorScenario(send testutils.ConduitSendWrapperFunc) { 302 // creating engines 303 count := len(suite.networks) 304 engs := make([]*testutils.MeshEngine, 0) 305 wg := sync.WaitGroup{} 306 307 for i := range suite.networks { 308 eng := testutils.NewMeshEngine(suite.Suite.T(), suite.networks[i], count-1, channels.TestNetworkChannel) 309 engs = append(engs, eng) 310 } 311 312 // allow nodes to heartbeat and discover each other 313 // each node will register ~D protect messages, where D is the default out-degree 314 for i := 0; i < pubsub.GossipSubD*count; i++ { 315 select { 316 case <-suite.obs: 317 case <-time.After(2 * time.Second): 318 assert.FailNow(suite.T(), "could not receive pubsub tag indicating mesh formed") 319 } 320 } 321 322 // choose half of the nodes as target 323 allIds := suite.ids.NodeIDs() 324 var targets []flow.Identifier 325 // create a target list of half of the nodes 326 for i := 0; i < len(allIds)/2; i++ { 327 targets = append(targets, allIds[i]) 328 } 329 330 // node 0 broadcasting a message to all targets 331 event := &message.TestMessage{ 332 Text: "hello from node 0", 333 } 334 require.NoError(suite.Suite.T(), send(event, engs[len(engs)-1].Con, targets...)) 335 336 // fires a goroutine for all engines to listens for the incoming message 337 for i := 0; i < len(allIds)/2; i++ { 338 wg.Add(1) 339 go func(e *testutils.MeshEngine) { 340 <-e.Received 341 wg.Done() 342 }(engs[i]) 343 } 344 345 unittest.AssertReturnsBefore(suite.T(), wg.Wait, 10*time.Second) 346 347 // evaluates that all messages are received 348 for index, e := range engs { 349 if index < len(engs)/2 { 350 assert.Len(suite.Suite.T(), e.Event, 1, fmt.Sprintf("message not received %v", index)) 351 assertChannelReceived(suite.T(), e, channels.TestNetworkChannel) 352 } else { 353 assert.Len(suite.Suite.T(), e.Event, 0, fmt.Sprintf("message received when none was expected %v", index)) 354 } 355 } 356 } 357 358 // messageSizeScenario provides a scenario to check if a message of maximum permissible size can be sent 359 // successfully. 360 // It broadcasts a message from the first node to all the nodes in the identifiers list using send wrapper function. 361 func (suite *MeshEngineTestSuite) messageSizeScenario(send testutils.ConduitSendWrapperFunc, size uint) { 362 // creating engines 363 count := len(suite.networks) 364 engs := make([]*testutils.MeshEngine, 0) 365 wg := sync.WaitGroup{} 366 367 for i := range suite.networks { 368 eng := testutils.NewMeshEngine(suite.Suite.T(), suite.networks[i], count-1, channels.TestNetworkChannel) 369 engs = append(engs, eng) 370 } 371 372 // allow nodes to heartbeat and discover each other 373 // each node will register ~D protect messages per mesh setup, where D is the default out-degree 374 for i := 0; i < pubsub.GossipSubD*count; i++ { 375 select { 376 case <-suite.obs: 377 case <-time.After(8 * time.Second): 378 assert.FailNow(suite.T(), "could not receive pubsub tag indicating mesh formed") 379 } 380 } 381 // others keeps the identifier of all nodes except node that is sender. 382 others := suite.ids.Filter(filter.Not(filter.HasNodeID[flow.Identity](suite.ids[0].NodeID))).NodeIDs() 383 384 // generates and sends an event of custom size to the network 385 payload := testutils.NetworkPayloadFixture(suite.T(), size) 386 event := &message.TestMessage{ 387 Text: string(payload), 388 } 389 390 require.NoError(suite.T(), send(event, engs[0].Con, others...)) 391 392 // fires a goroutine for all engines (except sender) to listen for the incoming message 393 for _, eng := range engs[1:] { 394 wg.Add(1) 395 go func(e *testutils.MeshEngine) { 396 <-e.Received 397 wg.Done() 398 }(eng) 399 } 400 401 unittest.AssertReturnsBefore(suite.Suite.T(), wg.Wait, 30*time.Second) 402 403 // evaluates that all messages are received 404 for index, e := range engs[1:] { 405 assert.Len(suite.Suite.T(), e.Event, 1, "message not received by engine %d", index+1) 406 assertChannelReceived(suite.T(), e, channels.TestNetworkChannel) 407 } 408 } 409 410 // conduitCloseScenario tests after a Conduit is closed, an engine cannot send or receive a message for that channel. 411 func (suite *MeshEngineTestSuite) conduitCloseScenario(send testutils.ConduitSendWrapperFunc) { 412 413 testutils.OptionalSleep(send) 414 415 // creating engines 416 count := len(suite.networks) 417 engs := make([]*testutils.MeshEngine, 0) 418 wg := sync.WaitGroup{} 419 420 for i := range suite.networks { 421 eng := testutils.NewMeshEngine(suite.Suite.T(), suite.networks[i], count-1, channels.TestNetworkChannel) 422 engs = append(engs, eng) 423 } 424 425 // allow nodes to heartbeat and discover each other 426 // each node will register ~D protect messages, where D is the default out-degree 427 for i := 0; i < pubsub.GossipSubD*count; i++ { 428 select { 429 case <-suite.obs: 430 case <-time.After(2 * time.Second): 431 assert.FailNow(suite.T(), "could not receive pubsub tag indicating mesh formed") 432 } 433 } 434 435 // unregister a random engine from the test topic by calling close on it's conduit 436 unregisterIndex := rand.Intn(count) 437 err := engs[unregisterIndex].Con.Close() 438 assert.NoError(suite.T(), err) 439 440 // waits enough for peer manager to unsubscribe the node from the topic 441 // while libp2p is unsubscribing the node, the topology gets unstable 442 // and connections to the node may be refused (although very unlikely). 443 time.Sleep(2 * time.Second) 444 445 // each node attempts to broadcast a message to all others 446 for i := range suite.networks { 447 event := &message.TestMessage{ 448 Text: fmt.Sprintf("hello from node %v", i), 449 } 450 451 // others keeps the identifier of all nodes except ith node and the node that unregistered from the topic. 452 // nodes without valid topic registration for a channel will reject messages on that channel via unicast. 453 others := suite.ids.Filter(filter.Not(filter.HasNodeID[flow.Identity](suite.ids[i].NodeID, suite.ids[unregisterIndex].NodeID))).NodeIDs() 454 455 if i == unregisterIndex { 456 // assert that unsubscribed engine cannot publish on that topic 457 require.Error(suite.Suite.T(), send(event, engs[i].Con, others...)) 458 continue 459 } 460 461 require.NoError(suite.Suite.T(), send(event, engs[i].Con, others...)) 462 } 463 464 // fire a goroutine to listen for incoming messages for each engine except for the one which unregistered 465 for i := range suite.networks { 466 if i == unregisterIndex { 467 continue 468 } 469 wg.Add(1) 470 go func(e *testutils.MeshEngine) { 471 expectedMsgCnt := count - 2 // count less self and unsubscribed engine 472 for x := 0; x < expectedMsgCnt; x++ { 473 <-e.Received 474 } 475 wg.Done() 476 }(engs[i]) 477 } 478 479 // assert every one except the unsubscribed engine received the message 480 unittest.AssertReturnsBefore(suite.Suite.T(), wg.Wait, 2*time.Second) 481 482 // assert that the unregistered engine did not receive the message 483 unregisteredEng := engs[unregisterIndex] 484 assert.Emptyf(suite.T(), unregisteredEng.Received, "unregistered engine received the topic message") 485 } 486 487 // assertChannelReceived asserts that the given channel was received on the given engine 488 func assertChannelReceived(t *testing.T, e *testutils.MeshEngine, channel channels.Channel) { 489 unittest.AssertReturnsBefore(t, func() { 490 assert.Equal(t, channel, <-e.Channel) 491 }, 100*time.Millisecond) 492 } 493 494 // extractSenderID returns a bool array with the index i true if there is a message from node i in the provided messages. 495 // enginesNum is the number of engines 496 // events is the channel of received events 497 // expectedMsgTxt is the common prefix among all the messages that we expect to receive, for example 498 // we expect to receive "hello from node x" in this test, and then expectedMsgTxt is "hello form node" 499 func extractSenderID(enginesNum int, events chan interface{}, expectedMsgTxt string) ([]bool, error) { 500 indices := make([]bool, enginesNum) 501 expectedMsgSize := len(expectedMsgTxt) 502 for i := 0; i < enginesNum-1; i++ { 503 var event interface{} 504 select { 505 case event = <-events: 506 default: 507 continue 508 } 509 echo := event.(*message.TestMessage) 510 msg := echo.Text 511 if len(msg) < expectedMsgSize { 512 return nil, fmt.Errorf("invalid message format") 513 } 514 senderIndex := msg[expectedMsgSize:] 515 senderIndex = strings.TrimLeft(senderIndex, " ") 516 nodeID, err := strconv.Atoi(senderIndex) 517 if err != nil { 518 return nil, fmt.Errorf("could not extract the node id from: %v", msg) 519 } 520 521 if indices[nodeID] { 522 return nil, fmt.Errorf("duplicate message reception: %v", msg) 523 } 524 525 if msg == fmt.Sprintf("%s %v", expectedMsgTxt, nodeID) { 526 indices[nodeID] = true 527 } 528 } 529 return indices, nil 530 }