
     1  package cohort1
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"math/rand"
     7  	"os"
     8  	"strconv"
     9  	"strings"
    10  	"sync"
    11  	"testing"
    12  	"time"
    14  	""
    15  	pubsub ""
    16  	""
    17  	""
    18  	""
    19  	""
    21  	""
    22  	""
    23  	""
    24  	""
    25  	""
    26  	""
    27  	""
    28  	""
    29  	""
    30  	""
    31  	""
    32  	p2pnode ""
    33  	p2ptest ""
    34  	""
    35  	""
    36  )
    38  // MeshEngineTestSuite evaluates the message delivery functionality for the overlay
    39  // of engines over a complete graph
    40  type MeshEngineTestSuite struct {
    41  	suite.Suite
    42  	testutils.ConduitWrapper                     // used as a wrapper around conduit methods
    43  	networks                 []*underlay.Network // used to keep track of the networks
    44  	libp2pNodes              []p2p.LibP2PNode    // used to keep track of the libp2p nodes
    45  	ids                      flow.IdentityList   // used to keep track of the identifiers associated with networks
    46  	obs                      chan string         // used to keep track of Protect events tagged by pubsub messages
    47  	cancel                   context.CancelFunc
    48  }
    50  // TestMeshNetTestSuite runs all tests in this test suit
    51  func TestMeshNetTestSuite(t *testing.T) {
    52  	suite.Run(t, new(MeshEngineTestSuite))
    53  }
    55  // SetupTest is executed prior to each test in this test suite. It creates and initializes
    56  // a set of network instances, sets up connection managers, nodes, identities, observables, etc.
    57  // This setup ensures that all necessary configurations are in place before running the tests.
    58  func (suite *MeshEngineTestSuite) SetupTest() {
    59  	// defines total number of nodes in our network (minimum 3 needed to use 1-k messaging)
    60  	const count = 10
    61  	logger := zerolog.New(os.Stderr).Level(zerolog.ErrorLevel)
    62  	log.SetAllLoggers(log.LevelError)
    64  	// set up a channel to receive pubsub tags from connManagers of the nodes
    65  	peerChannel := make(chan string)
    67  	// Tag Observables Usage Explanation:
    68  	// The tagsObserver is used to observe connections tagged by pubsub messages. This is instrumental in understanding
    69  	// the connectivity between different peers and verifying the formation of the mesh within this test suite.
    70  	// Issues:
    71  	// - Deviation from Production Code: The usage of tag observables here may not reflect the behavior in the production environment.
    72  	// - Mask Issues in the Production Environment: The observables tied to testing might lead to behaviors or errors that are
    73  	//   masked or not evident within the actual production code.
    74  	// TODO: Evaluate the necessity of tag observables in this test and consider addressing the deviation from production
    75  	// code and potential mask issues. Evaluate the possibility of removing this part eventually.
    76  	ob := tagsObserver{
    77  		tags: peerChannel,
    78  		log:  logger,
    79  	}
    81  	ctx, cancel := context.WithCancel(context.Background())
    82  	suite.cancel = cancel
    84  	signalerCtx := irrecoverable.NewMockSignalerContext(suite.T(), ctx)
    86  	sporkId := unittest.IdentifierFixture()
    87  	libP2PNodes := make([]p2p.LibP2PNode, 0)
    88  	identities := make(flow.IdentityList, 0)
    89  	tagObservables := make([]observable.Observable, 0)
    90  	idProvider := unittest.NewUpdatableIDProvider(flow.IdentityList{})
    91  	defaultFlowConfig, err := config.DefaultConfig()
    92  	require.NoError(suite.T(), err)
    93  	opts := []p2ptest.NodeFixtureParameterOption{p2ptest.WithUnicastHandlerFunc(nil)}
    95  	for i := 0; i < count; i++ {
    96  		connManager, err := testutils.NewTagWatchingConnManager(
    97  			unittest.Logger(),
    98  			metrics.NewNoopCollector(),
    99  			&defaultFlowConfig.NetworkConfig.ConnectionManager)
   100  		require.NoError(suite.T(), err)
   102  		opts = append(opts, p2ptest.WithConnectionManager(connManager))
   103  		node, nodeId := p2ptest.NodeFixture(suite.T(),
   104  			sporkId,
   105  			suite.T().Name(),
   106  			idProvider,
   107  			opts...)
   108  		libP2PNodes = append(libP2PNodes, node)
   109  		identities = append(identities, &nodeId)
   110  		tagObservables = append(tagObservables, connManager)
   111  	}
   112  	idProvider.SetIdentities(identities)
   114  	suite.libp2pNodes = libP2PNodes
   115  	suite.ids = identities
   117  	suite.networks, _ = testutils.NetworksFixture(suite.T(), sporkId, suite.ids, suite.libp2pNodes)
   118  	// starts the nodes and networks
   119  	testutils.StartNodes(signalerCtx, suite.T(), suite.libp2pNodes)
   120  	for _, net := range suite.networks {
   121  		testutils.StartNetworks(signalerCtx, suite.T(), []network.EngineRegistry{net})
   122  		unittest.RequireComponentsReadyBefore(suite.T(), 1*time.Second, net)
   123  	}
   125  	for _, observableConnMgr := range tagObservables {
   126  		observableConnMgr.Subscribe(&ob)
   127  	}
   128  	suite.obs = peerChannel
   129  }
   131  // TearDownTest closes the networks within a specified timeout
   132  func (suite *MeshEngineTestSuite) TearDownTest() {
   133  	suite.cancel()
   134  	testutils.StopComponents(suite.T(), suite.networks, 3*time.Second)
   135  	testutils.StopComponents(suite.T(), suite.libp2pNodes, 3*time.Second)
   136  }
   138  // TestAllToAll_Publish evaluates the network of mesh engines against allToAllScenario scenario.
   139  // Network instances during this test use their Publish method to disseminate messages.
   140  func (suite *MeshEngineTestSuite) TestAllToAll_Publish() {
   141  	suite.allToAllScenario(suite.Publish)
   142  }
   144  // TestAllToAll_Multicast evaluates the network of mesh engines against allToAllScenario scenario.
   145  // Network instances during this test use their Multicast method to disseminate messages.
   146  func (suite *MeshEngineTestSuite) TestAllToAll_Multicast() {
   147  	suite.allToAllScenario(suite.Multicast)
   148  }
   150  // TestAllToAll_Unicast evaluates the network of mesh engines against allToAllScenario scenario.
   151  // Network instances during this test use their Unicast method to disseminate messages.
   152  func (suite *MeshEngineTestSuite) TestAllToAll_Unicast() {
   153  	suite.allToAllScenario(suite.Unicast)
   154  }
   156  // TestTargetedValidators_Unicast tests if only the intended recipients in a 1-k messaging actually receive the message.
   157  // The messages are disseminated through the Unicast method of conduits.
   158  func (suite *MeshEngineTestSuite) TestTargetedValidators_Unicast() {
   159  	suite.targetValidatorScenario(suite.Unicast)
   160  }
   162  // TestTargetedValidators_Multicast tests if only the intended recipients in a 1-k messaging actually receive the
   163  // message.
   164  // The messages are disseminated through the Multicast method of conduits.
   165  func (suite *MeshEngineTestSuite) TestTargetedValidators_Multicast() {
   166  	suite.targetValidatorScenario(suite.Multicast)
   167  }
   169  // TestTargetedValidators_Publish tests if only the intended recipients in a 1-k messaging actually receive the message.
   170  // The messages are disseminated through the Multicast method of conduits.
   171  func (suite *MeshEngineTestSuite) TestTargetedValidators_Publish() {
   172  	suite.targetValidatorScenario(suite.Publish)
   173  }
   175  // TestMaxMessageSize_Unicast evaluates the messageSizeScenario scenario using
   176  // the Unicast method of conduits.
   177  func (suite *MeshEngineTestSuite) TestMaxMessageSize_Unicast() {
   178  	suite.messageSizeScenario(suite.Unicast, underlay.DefaultMaxUnicastMsgSize)
   179  }
   181  // TestMaxMessageSize_Multicast evaluates the messageSizeScenario scenario using
   182  // the Multicast method of conduits.
   183  func (suite *MeshEngineTestSuite) TestMaxMessageSize_Multicast() {
   184  	suite.messageSizeScenario(suite.Multicast, p2pnode.DefaultMaxPubSubMsgSize)
   185  }
   187  // TestMaxMessageSize_Publish evaluates the messageSizeScenario scenario using the
   188  // Publish method of conduits.
   189  func (suite *MeshEngineTestSuite) TestMaxMessageSize_Publish() {
   190  	suite.messageSizeScenario(suite.Publish, p2pnode.DefaultMaxPubSubMsgSize)
   191  }
   193  // TestUnregister_Publish tests that an engine cannot send any message using Publish
   194  // or receive any messages after the conduit is closed
   195  func (suite *MeshEngineTestSuite) TestUnregister_Publish() {
   196  	suite.conduitCloseScenario(suite.Publish)
   197  }
   199  // TestUnregister_Publish tests that an engine cannot send any message using Multicast
   200  // or receive any messages after the conduit is closed
   201  func (suite *MeshEngineTestSuite) TestUnregister_Multicast() {
   202  	suite.conduitCloseScenario(suite.Multicast)
   203  }
   205  // TestUnregister_Publish tests that an engine cannot send any message using Unicast
   206  // or receive any messages after the conduit is closed
   207  func (suite *MeshEngineTestSuite) TestUnregister_Unicast() {
   208  	suite.conduitCloseScenario(suite.Unicast)
   209  }
   211  // allToAllScenario creates a complete mesh of the engines, where each engine x sends a
   212  // "hello from node x" to other engines. It then evaluates the correctness of message
   213  // delivery as well as the content of the messages. This scenario tests the capability of
   214  // the engines to communicate in a fully connected graph, ensuring both the reachability
   215  // of messages and the integrity of their contents.
   216  func (suite *MeshEngineTestSuite) allToAllScenario(send testutils.ConduitSendWrapperFunc) {
   217  	// allows nodes to find each other in case of Mulitcast and Publish
   218  	testutils.OptionalSleep(send)
   220  	// creating engines
   221  	count := len(suite.networks)
   222  	engs := make([]*testutils.MeshEngine, 0)
   223  	wg := sync.WaitGroup{}
   225  	// logs[i][j] keeps the message that node i sends to node j
   226  	logs := make(map[int][]string)
   227  	for i := range suite.networks {
   228  		eng := testutils.NewMeshEngine(suite.Suite.T(), suite.networks[i], count-1, channels.TestNetworkChannel)
   229  		engs = append(engs, eng)
   230  		logs[i] = make([]string, 0)
   231  	}
   233  	// allow nodes to heartbeat and discover each other
   234  	// each node will register ~D protect messages, where D is the default out-degree
   235  	for i := 0; i < pubsub.GossipSubD*count; i++ {
   236  		select {
   237  		case <-suite.obs:
   238  		case <-time.After(8 * time.Second):
   239  			assert.FailNow(suite.T(), "could not receive pubsub tag indicating mesh formed")
   240  		}
   241  	}
   243  	// Each node broadcasting a message to all others
   244  	for i := range suite.networks {
   245  		event := &message.TestMessage{
   246  			Text: fmt.Sprintf("hello from node %v", i),
   247  		}
   249  		// others keeps the identifier of all nodes except ith node
   250  		others := suite.ids.Filter(filter.Not(filter.HasNodeID[flow.Identity](suite.ids[i].NodeID))).NodeIDs()
   251  		require.NoError(suite.Suite.T(), send(event, engs[i].Con, others...))
   252  		wg.Add(count - 1)
   253  	}
   255  	// fires a goroutine for each engine that listens to incoming messages
   256  	for i := range suite.networks {
   257  		go func(e *testutils.MeshEngine) {
   258  			for x := 0; x < count-1; x++ {
   259  				<-e.Received
   260  				wg.Done()
   261  			}
   262  		}(engs[i])
   263  	}
   265  	unittest.AssertReturnsBefore(suite.Suite.T(), wg.Wait, 30*time.Second)
   267  	// evaluates that all messages are received
   268  	for index, e := range engs {
   269  		// confirms the number of received messages at each node
   270  		if len(e.Event) != (count - 1) {
   271  			assert.Fail(suite.Suite.T(),
   272  				fmt.Sprintf("Message reception mismatch at node %v. Expected: %v, Got: %v", index, count-1, len(e.Event)))
   273  		}
   275  		for i := 0; i < count-1; i++ {
   276  			assertChannelReceived(suite.T(), e, channels.TestNetworkChannel)
   277  		}
   279  		// extracts failed messages
   280  		receivedIndices, err := extractSenderID(count, e.Event, "hello from node")
   281  		require.NoError(suite.Suite.T(), err)
   283  		for j := 0; j < count; j++ {
   284  			// evaluates self-gossip
   285  			if j == index {
   286  				assert.False(suite.Suite.T(), (receivedIndices)[index], fmt.Sprintf("self gossiped for node %v detected", index))
   287  			}
   288  			// evaluates content
   289  			if !(receivedIndices)[j] {
   290  				assert.False(suite.Suite.T(), (receivedIndices)[index],
   291  					fmt.Sprintf("Message not found in node #%v's messages. Expected: Message from node %v. Got: No message", index, j))
   292  			}
   293  		}
   294  	}
   295  }
   297  // targetValidatorScenario sends a single message from last node to the first half of the nodes
   298  // based on identifiers list.
   299  // It then verifies that only the intended recipients receive the message.
   300  // Message dissemination is done using the send wrapper of conduit.
   301  func (suite *MeshEngineTestSuite) targetValidatorScenario(send testutils.ConduitSendWrapperFunc) {
   302  	// creating engines
   303  	count := len(suite.networks)
   304  	engs := make([]*testutils.MeshEngine, 0)
   305  	wg := sync.WaitGroup{}
   307  	for i := range suite.networks {
   308  		eng := testutils.NewMeshEngine(suite.Suite.T(), suite.networks[i], count-1, channels.TestNetworkChannel)
   309  		engs = append(engs, eng)
   310  	}
   312  	// allow nodes to heartbeat and discover each other
   313  	// each node will register ~D protect messages, where D is the default out-degree
   314  	for i := 0; i < pubsub.GossipSubD*count; i++ {
   315  		select {
   316  		case <-suite.obs:
   317  		case <-time.After(2 * time.Second):
   318  			assert.FailNow(suite.T(), "could not receive pubsub tag indicating mesh formed")
   319  		}
   320  	}
   322  	// choose half of the nodes as target
   323  	allIds := suite.ids.NodeIDs()
   324  	var targets []flow.Identifier
   325  	// create a target list of half of the nodes
   326  	for i := 0; i < len(allIds)/2; i++ {
   327  		targets = append(targets, allIds[i])
   328  	}
   330  	// node 0 broadcasting a message to all targets
   331  	event := &message.TestMessage{
   332  		Text: "hello from node 0",
   333  	}
   334  	require.NoError(suite.Suite.T(), send(event, engs[len(engs)-1].Con, targets...))
   336  	// fires a goroutine for all engines to listens for the incoming message
   337  	for i := 0; i < len(allIds)/2; i++ {
   338  		wg.Add(1)
   339  		go func(e *testutils.MeshEngine) {
   340  			<-e.Received
   341  			wg.Done()
   342  		}(engs[i])
   343  	}
   345  	unittest.AssertReturnsBefore(suite.T(), wg.Wait, 10*time.Second)
   347  	// evaluates that all messages are received
   348  	for index, e := range engs {
   349  		if index < len(engs)/2 {
   350  			assert.Len(suite.Suite.T(), e.Event, 1, fmt.Sprintf("message not received %v", index))
   351  			assertChannelReceived(suite.T(), e, channels.TestNetworkChannel)
   352  		} else {
   353  			assert.Len(suite.Suite.T(), e.Event, 0, fmt.Sprintf("message received when none was expected %v", index))
   354  		}
   355  	}
   356  }
   358  // messageSizeScenario provides a scenario to check if a message of maximum permissible size can be sent
   359  // successfully.
   360  // It broadcasts a message from the first node to all the nodes in the identifiers list using send wrapper function.
   361  func (suite *MeshEngineTestSuite) messageSizeScenario(send testutils.ConduitSendWrapperFunc, size uint) {
   362  	// creating engines
   363  	count := len(suite.networks)
   364  	engs := make([]*testutils.MeshEngine, 0)
   365  	wg := sync.WaitGroup{}
   367  	for i := range suite.networks {
   368  		eng := testutils.NewMeshEngine(suite.Suite.T(), suite.networks[i], count-1, channels.TestNetworkChannel)
   369  		engs = append(engs, eng)
   370  	}
   372  	// allow nodes to heartbeat and discover each other
   373  	// each node will register ~D protect messages per mesh setup, where D is the default out-degree
   374  	for i := 0; i < pubsub.GossipSubD*count; i++ {
   375  		select {
   376  		case <-suite.obs:
   377  		case <-time.After(8 * time.Second):
   378  			assert.FailNow(suite.T(), "could not receive pubsub tag indicating mesh formed")
   379  		}
   380  	}
   381  	// others keeps the identifier of all nodes except node that is sender.
   382  	others := suite.ids.Filter(filter.Not(filter.HasNodeID[flow.Identity](suite.ids[0].NodeID))).NodeIDs()
   384  	// generates and sends an event of custom size to the network
   385  	payload := testutils.NetworkPayloadFixture(suite.T(), size)
   386  	event := &message.TestMessage{
   387  		Text: string(payload),
   388  	}
   390  	require.NoError(suite.T(), send(event, engs[0].Con, others...))
   392  	// fires a goroutine for all engines (except sender) to listen for the incoming message
   393  	for _, eng := range engs[1:] {
   394  		wg.Add(1)
   395  		go func(e *testutils.MeshEngine) {
   396  			<-e.Received
   397  			wg.Done()
   398  		}(eng)
   399  	}
   401  	unittest.AssertReturnsBefore(suite.Suite.T(), wg.Wait, 30*time.Second)
   403  	// evaluates that all messages are received
   404  	for index, e := range engs[1:] {
   405  		assert.Len(suite.Suite.T(), e.Event, 1, "message not received by engine %d", index+1)
   406  		assertChannelReceived(suite.T(), e, channels.TestNetworkChannel)
   407  	}
   408  }
   410  // conduitCloseScenario tests after a Conduit is closed, an engine cannot send or receive a message for that channel.
   411  func (suite *MeshEngineTestSuite) conduitCloseScenario(send testutils.ConduitSendWrapperFunc) {
   413  	testutils.OptionalSleep(send)
   415  	// creating engines
   416  	count := len(suite.networks)
   417  	engs := make([]*testutils.MeshEngine, 0)
   418  	wg := sync.WaitGroup{}
   420  	for i := range suite.networks {
   421  		eng := testutils.NewMeshEngine(suite.Suite.T(), suite.networks[i], count-1, channels.TestNetworkChannel)
   422  		engs = append(engs, eng)
   423  	}
   425  	// allow nodes to heartbeat and discover each other
   426  	// each node will register ~D protect messages, where D is the default out-degree
   427  	for i := 0; i < pubsub.GossipSubD*count; i++ {
   428  		select {
   429  		case <-suite.obs:
   430  		case <-time.After(2 * time.Second):
   431  			assert.FailNow(suite.T(), "could not receive pubsub tag indicating mesh formed")
   432  		}
   433  	}
   435  	// unregister a random engine from the test topic by calling close on it's conduit
   436  	unregisterIndex := rand.Intn(count)
   437  	err := engs[unregisterIndex].Con.Close()
   438  	assert.NoError(suite.T(), err)
   440  	// waits enough for peer manager to unsubscribe the node from the topic
   441  	// while libp2p is unsubscribing the node, the topology gets unstable
   442  	// and connections to the node may be refused (although very unlikely).
   443  	time.Sleep(2 * time.Second)
   445  	// each node attempts to broadcast a message to all others
   446  	for i := range suite.networks {
   447  		event := &message.TestMessage{
   448  			Text: fmt.Sprintf("hello from node %v", i),
   449  		}
   451  		// others keeps the identifier of all nodes except ith node and the node that unregistered from the topic.
   452  		// nodes without valid topic registration for a channel will reject messages on that channel via unicast.
   453  		others := suite.ids.Filter(filter.Not(filter.HasNodeID[flow.Identity](suite.ids[i].NodeID, suite.ids[unregisterIndex].NodeID))).NodeIDs()
   455  		if i == unregisterIndex {
   456  			// assert that unsubscribed engine cannot publish on that topic
   457  			require.Error(suite.Suite.T(), send(event, engs[i].Con, others...))
   458  			continue
   459  		}
   461  		require.NoError(suite.Suite.T(), send(event, engs[i].Con, others...))
   462  	}
   464  	// fire a goroutine to listen for incoming messages for each engine except for the one which unregistered
   465  	for i := range suite.networks {
   466  		if i == unregisterIndex {
   467  			continue
   468  		}
   469  		wg.Add(1)
   470  		go func(e *testutils.MeshEngine) {
   471  			expectedMsgCnt := count - 2 // count less self and unsubscribed engine
   472  			for x := 0; x < expectedMsgCnt; x++ {
   473  				<-e.Received
   474  			}
   475  			wg.Done()
   476  		}(engs[i])
   477  	}
   479  	// assert every one except the unsubscribed engine received the message
   480  	unittest.AssertReturnsBefore(suite.Suite.T(), wg.Wait, 2*time.Second)
   482  	// assert that the unregistered engine did not receive the message
   483  	unregisteredEng := engs[unregisterIndex]
   484  	assert.Emptyf(suite.T(), unregisteredEng.Received, "unregistered engine received the topic message")
   485  }
   487  // assertChannelReceived asserts that the given channel was received on the given engine
   488  func assertChannelReceived(t *testing.T, e *testutils.MeshEngine, channel channels.Channel) {
   489  	unittest.AssertReturnsBefore(t, func() {
   490  		assert.Equal(t, channel, <-e.Channel)
   491  	}, 100*time.Millisecond)
   492  }
   494  // extractSenderID returns a bool array with the index i true if there is a message from node i in the provided messages.
   495  // enginesNum is the number of engines
   496  // events is the channel of received events
   497  // expectedMsgTxt is the common prefix among all the messages that we expect to receive, for example
   498  // we expect to receive "hello from node x" in this test, and then expectedMsgTxt is "hello form node"
   499  func extractSenderID(enginesNum int, events chan interface{}, expectedMsgTxt string) ([]bool, error) {
   500  	indices := make([]bool, enginesNum)
   501  	expectedMsgSize := len(expectedMsgTxt)
   502  	for i := 0; i < enginesNum-1; i++ {
   503  		var event interface{}
   504  		select {
   505  		case event = <-events:
   506  		default:
   507  			continue
   508  		}
   509  		echo := event.(*message.TestMessage)
   510  		msg := echo.Text
   511  		if len(msg) < expectedMsgSize {
   512  			return nil, fmt.Errorf("invalid message format")
   513  		}
   514  		senderIndex := msg[expectedMsgSize:]
   515  		senderIndex = strings.TrimLeft(senderIndex, " ")
   516  		nodeID, err := strconv.Atoi(senderIndex)
   517  		if err != nil {
   518  			return nil, fmt.Errorf("could not extract the node id from: %v", msg)
   519  		}
   521  		if indices[nodeID] {
   522  			return nil, fmt.Errorf("duplicate message reception: %v", msg)
   523  		}
   525  		if msg == fmt.Sprintf("%s %v", expectedMsgTxt, nodeID) {
   526  			indices[nodeID] = true
   527  		}
   528  	}
   529  	return indices, nil
   530  }