github.com/koko1123/flow-go-1@v0.29.6/network/test/epochtransition_test.go (about)

     1  package test
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"math/rand"
     7  	"os"
     8  	"reflect"
     9  	"runtime"
    10  	"sync"
    11  	"testing"
    12  	"time"
    13  
    14  	"github.com/ipfs/go-log"
    15  	"github.com/rs/zerolog"
    16  	"github.com/stretchr/testify/assert"
    17  	"github.com/stretchr/testify/mock"
    18  	"github.com/stretchr/testify/require"
    19  	"github.com/stretchr/testify/suite"
    20  
    21  	"github.com/koko1123/flow-go-1/model/flow"
    22  	"github.com/koko1123/flow-go-1/model/flow/filter"
    23  	"github.com/koko1123/flow-go-1/model/libp2p/message"
    24  	"github.com/koko1123/flow-go-1/module/irrecoverable"
    25  	"github.com/koko1123/flow-go-1/network"
    26  	"github.com/koko1123/flow-go-1/network/internal/testutils"
    27  	"github.com/koko1123/flow-go-1/network/mocknetwork"
    28  	mockprotocol "github.com/koko1123/flow-go-1/state/protocol/mock"
    29  	"github.com/koko1123/flow-go-1/utils/unittest"
    30  )
    31  
    32  // MutableIdentityTableSuite tests that the networking layer responds correctly
    33  // to changes to the identity table. When nodes are added, we should update our
    34  // topology and accept connections from these new nodes. When nodes are removed
    35  // or ejected we should update our topology and restrict connections from these
    36  // nodes.
    37  type MutableIdentityTableSuite struct {
    38  	suite.Suite
    39  	testutils.ConduitWrapper
    40  	testNodes        testNodeList
    41  	removedTestNodes testNodeList // test nodes which might have been removed from the mesh
    42  	state            *mockprotocol.State
    43  	snapshot         *mockprotocol.Snapshot
    44  	logger           zerolog.Logger
    45  	cancels          []context.CancelFunc
    46  }
    47  
    48  // testNode encapsulates the node state which includes its identity, middleware, network,
    49  // mesh engine and the id refresher
    50  type testNode struct {
    51  	id     *flow.Identity
    52  	mw     network.Middleware
    53  	net    network.Network
    54  	engine *testutils.MeshEngine
    55  }
    56  
    57  // testNodeList encapsulates a list of test node and
    58  // has functions to retrieve the different elements of the test nodes in a concurrency safe manner
    59  type testNodeList struct {
    60  	sync.RWMutex
    61  	nodes []testNode
    62  }
    63  
    64  func newTestNodeList() testNodeList {
    65  	return testNodeList{}
    66  }
    67  
    68  func (t *testNodeList) append(node testNode) {
    69  	t.Lock()
    70  	defer t.Unlock()
    71  	t.nodes = append(t.nodes, node)
    72  }
    73  
    74  func (t *testNodeList) remove() testNode {
    75  	t.Lock()
    76  	defer t.Unlock()
    77  	// choose a random node to remove
    78  	i := rand.Intn(len(t.nodes))
    79  	removedNode := t.nodes[i]
    80  	t.nodes = append(t.nodes[:i], t.nodes[i+1:]...)
    81  	return removedNode
    82  }
    83  
    84  func (t *testNodeList) ids() flow.IdentityList {
    85  	t.RLock()
    86  	defer t.RUnlock()
    87  	ids := make(flow.IdentityList, len(t.nodes))
    88  	for i, node := range t.nodes {
    89  		ids[i] = node.id
    90  	}
    91  	return ids
    92  }
    93  
    94  func (t *testNodeList) lastAdded() (testNode, error) {
    95  	t.RLock()
    96  	defer t.RUnlock()
    97  	if len(t.nodes) > 0 {
    98  		return t.nodes[len(t.nodes)-1], nil
    99  	}
   100  	return testNode{}, fmt.Errorf("node list empty")
   101  }
   102  
   103  func (t *testNodeList) engines() []*testutils.MeshEngine {
   104  	t.RLock()
   105  	defer t.RUnlock()
   106  	engs := make([]*testutils.MeshEngine, len(t.nodes))
   107  	for i, node := range t.nodes {
   108  		engs[i] = node.engine
   109  	}
   110  	return engs
   111  }
   112  
   113  func (t *testNodeList) networks() []network.Network {
   114  	t.RLock()
   115  	defer t.RUnlock()
   116  	nets := make([]network.Network, len(t.nodes))
   117  	for i, node := range t.nodes {
   118  		nets[i] = node.net
   119  	}
   120  	return nets
   121  }
   122  
   123  func TestMutableIdentityTable(t *testing.T) {
   124  	unittest.SkipUnless(t, unittest.TEST_TODO, "broken test")
   125  	suite.Run(t, new(MutableIdentityTableSuite))
   126  }
   127  
   128  // signalIdentityChanged update IDs for all the current set of nodes (simulating an epoch)
   129  func (suite *MutableIdentityTableSuite) signalIdentityChanged() {
   130  	for _, n := range suite.testNodes.nodes {
   131  		n.mw.UpdateNodeAddresses()
   132  	}
   133  }
   134  
   135  func (suite *MutableIdentityTableSuite) SetupTest() {
   136  	suite.testNodes = newTestNodeList()
   137  	suite.removedTestNodes = newTestNodeList()
   138  	rand.Seed(time.Now().UnixNano())
   139  	nodeCount := 10
   140  	suite.logger = zerolog.New(os.Stderr).Level(zerolog.ErrorLevel)
   141  	log.SetAllLoggers(log.LevelError)
   142  
   143  	suite.setupStateMock()
   144  	suite.addNodes(nodeCount)
   145  
   146  	// simulate a start of an epoch by signaling a change in the identity table
   147  	suite.signalIdentityChanged()
   148  
   149  	// wait for two lip2p heatbeats for the nodes to discover each other and form the mesh
   150  	time.Sleep(2 * time.Second)
   151  }
   152  
   153  // TearDownTest closes all the networks within a specified timeout
   154  func (suite *MutableIdentityTableSuite) TearDownTest() {
   155  	for _, cancel := range suite.cancels {
   156  		cancel()
   157  	}
   158  	networks := append(suite.testNodes.networks(), suite.removedTestNodes.networks()...)
   159  	testutils.StopComponents(suite.T(), networks, 3*time.Second)
   160  }
   161  
   162  // setupStateMock setup state related mocks (all networks share the same state mock)
   163  func (suite *MutableIdentityTableSuite) setupStateMock() {
   164  	final := unittest.BlockHeaderFixture()
   165  	suite.state = new(mockprotocol.State)
   166  	suite.snapshot = new(mockprotocol.Snapshot)
   167  	suite.snapshot.On("Head").Return(&final, nil)
   168  	suite.snapshot.On("Phase").Return(flow.EpochPhaseCommitted, nil)
   169  	// return all the current list of ids for the state.Final.Identities call made by the network
   170  	suite.snapshot.On("Identities", mock.Anything).Return(
   171  		func(flow.IdentityFilter) flow.IdentityList {
   172  			return suite.testNodes.ids()
   173  		},
   174  		func(flow.IdentityFilter) error { return nil })
   175  	suite.state.On("Final").Return(suite.snapshot, nil)
   176  }
   177  
   178  // addNodes creates count many new nodes and appends them to the suite state variables
   179  func (suite *MutableIdentityTableSuite) addNodes(count int) {
   180  	ctx, cancel := context.WithCancel(context.Background())
   181  	signalerCtx := irrecoverable.NewMockSignalerContext(suite.T(), ctx)
   182  
   183  	// create the ids, middlewares and networks
   184  	ids, nodes, mws, nets, _ := testutils.GenerateIDsMiddlewaresNetworks(
   185  		suite.T(),
   186  		count,
   187  		suite.logger,
   188  		unittest.NetworkCodec(),
   189  		mocknetwork.NewViolationsConsumer(suite.T()),
   190  	)
   191  	suite.cancels = append(suite.cancels, cancel)
   192  
   193  	testutils.StartNodesAndNetworks(signalerCtx, suite.T(), nodes, nets, 100*time.Millisecond)
   194  
   195  	// create the engines for the new nodes
   196  	engines := testutils.GenerateEngines(suite.T(), nets)
   197  
   198  	// create the test engines
   199  	for i := 0; i < count; i++ {
   200  		node := testNode{
   201  			id:     ids[i],
   202  			mw:     mws[i],
   203  			net:    nets[i],
   204  			engine: engines[i],
   205  		}
   206  		suite.testNodes.append(node)
   207  	}
   208  }
   209  
   210  // removeNode removes a randomly chosen test node from suite.testNodes and adds it to suite.removedTestNodes
   211  func (suite *MutableIdentityTableSuite) removeNode() testNode {
   212  	removedNode := suite.testNodes.remove()
   213  	suite.removedTestNodes.append(removedNode)
   214  	return removedNode
   215  }
   216  
   217  // TestNewNodeAdded tests that when a new node is added to the identity list e.g. on an epoch,
   218  // then it can connect to the network.
   219  func (suite *MutableIdentityTableSuite) TestNewNodeAdded() {
   220  
   221  	// add a new node the current list of nodes
   222  	suite.addNodes(1)
   223  
   224  	newNode, err := suite.testNodes.lastAdded()
   225  	require.NoError(suite.T(), err)
   226  	newID := newNode.id
   227  	newMiddleware := newNode.mw
   228  
   229  	suite.logger.Debug().
   230  		Str("new_node", newID.NodeID.String()).
   231  		Msg("added one node")
   232  
   233  	// update IDs for all the networks (simulating an epoch)
   234  	suite.signalIdentityChanged()
   235  
   236  	ids := suite.testNodes.ids()
   237  	engs := suite.testNodes.engines()
   238  
   239  	// check if the new node has sufficient connections with the existing nodes
   240  	// if it does, then it has been inducted successfully in the network
   241  	suite.assertConnected(newMiddleware, ids.Filter(filter.Not(filter.HasNodeID(newID.NodeID))))
   242  
   243  	// check that all the engines on this new epoch can talk to each other using any of the three networking primitives
   244  	suite.assertNetworkPrimitives(ids, engs, nil, nil)
   245  }
   246  
   247  // TestNodeRemoved tests that when an existing node is removed from the identity
   248  // list (ie. as a result of an ejection or transition into an epoch where that node
   249  // has un-staked) then it cannot connect to the network.
   250  func (suite *MutableIdentityTableSuite) TestNodeRemoved() {
   251  
   252  	// removed a node
   253  	removedNode := suite.removeNode()
   254  	removedID := removedNode.id
   255  	removedMiddleware := removedNode.mw
   256  	removedEngine := removedNode.engine
   257  
   258  	// update IDs for all the remaining nodes
   259  	// the removed node continues with the old identity list as we don't want to rely on it updating its ids list
   260  	suite.signalIdentityChanged()
   261  
   262  	remainingIDs := suite.testNodes.ids()
   263  	remainingEngs := suite.testNodes.engines()
   264  
   265  	// assert that the removed node has no connections with any of the other nodes
   266  	suite.assertDisconnected(removedMiddleware, remainingIDs)
   267  
   268  	// check that all remaining engines can still talk to each other while the ones removed can't
   269  	// using any of the three networking primitives
   270  	removedIDs := []*flow.Identity{removedID}
   271  	removedEngines := []*testutils.MeshEngine{removedEngine}
   272  
   273  	// assert that all three network primitives still work
   274  	suite.assertNetworkPrimitives(remainingIDs, remainingEngs, removedIDs, removedEngines)
   275  }
   276  
   277  // TestNodesAddedAndRemoved tests that:
   278  // a. a newly added node can exchange messages with the existing nodes
   279  // b. a node that has has been removed cannot exchange messages with the existing nodes
   280  func (suite *MutableIdentityTableSuite) TestNodesAddedAndRemoved() {
   281  
   282  	// remove a node
   283  	removedNode := suite.removeNode()
   284  	removedID := removedNode.id
   285  	removedMiddleware := removedNode.mw
   286  	removedEngine := removedNode.engine
   287  
   288  	// add a node
   289  	suite.addNodes(1)
   290  	newNode, err := suite.testNodes.lastAdded()
   291  	require.NoError(suite.T(), err)
   292  	newID := newNode.id
   293  	newMiddleware := newNode.mw
   294  
   295  	// update all current nodes
   296  	suite.signalIdentityChanged()
   297  
   298  	remainingIDs := suite.testNodes.ids()
   299  	remainingEngs := suite.testNodes.engines()
   300  
   301  	// check if the new node has sufficient connections with the existing nodes
   302  	suite.assertConnected(newMiddleware, remainingIDs.Filter(filter.Not(filter.HasNodeID(newID.NodeID))))
   303  
   304  	// assert that the removed node has no connections with any of the other nodes
   305  	suite.assertDisconnected(removedMiddleware, remainingIDs)
   306  
   307  	// check that all remaining engines can still talk to each other while the ones removed can't
   308  	// using any of the three networking primitives
   309  	removedIDs := []*flow.Identity{removedID}
   310  	removedEngines := []*testutils.MeshEngine{removedEngine}
   311  
   312  	// assert that all three network primitives still work
   313  	suite.assertNetworkPrimitives(remainingIDs, remainingEngs, removedIDs, removedEngines)
   314  }
   315  
   316  // assertConnected checks that the middleware of a node is directly connected
   317  // to at least half of the other nodes.
   318  func (suite *MutableIdentityTableSuite) assertConnected(mw network.Middleware, ids flow.IdentityList) {
   319  	t := suite.T()
   320  	threshold := len(ids) / 2
   321  	require.Eventuallyf(t, func() bool {
   322  		connections := 0
   323  		for _, id := range ids {
   324  			connected, err := mw.IsConnected(id.NodeID)
   325  			require.NoError(t, err)
   326  			if connected {
   327  				connections++
   328  			}
   329  		}
   330  		suite.logger.Debug().
   331  			Int("threshold", threshold).
   332  			Int("connections", connections).
   333  			Msg("current connection count")
   334  		return connections >= threshold
   335  	}, 5*time.Second, 100*time.Millisecond, "node is not connected to enough nodes")
   336  }
   337  
   338  // assertDisconnected checks that the middleware of a node is not connected to any of the other nodes specified in the
   339  // ids list
   340  func (suite *MutableIdentityTableSuite) assertDisconnected(mw network.Middleware, ids flow.IdentityList) {
   341  	t := suite.T()
   342  	require.Eventuallyf(t, func() bool {
   343  		for _, id := range ids {
   344  			connected, err := mw.IsConnected(id.NodeID)
   345  			require.NoError(t, err)
   346  			if connected {
   347  				return false
   348  			}
   349  		}
   350  		return true
   351  	}, 5*time.Second, 100*time.Millisecond, "node is still connected")
   352  }
   353  
   354  // assertNetworkPrimitives asserts that allowed engines can exchange messages between themselves but not with the
   355  // disallowed engines using each of the three network primitives
   356  func (suite *MutableIdentityTableSuite) assertNetworkPrimitives(
   357  	allowedIDs flow.IdentityList,
   358  	allowedEngs []*testutils.MeshEngine,
   359  	disallowedIDs flow.IdentityList,
   360  	disallowedEngs []*testutils.MeshEngine) {
   361  	suite.Run("Publish", func() {
   362  		suite.exchangeMessages(allowedIDs, allowedEngs, disallowedIDs, disallowedEngs, suite.Publish, false)
   363  	})
   364  	suite.Run("Multicast", func() {
   365  		suite.exchangeMessages(allowedIDs, allowedEngs, disallowedIDs, disallowedEngs, suite.Multicast, false)
   366  	})
   367  	suite.Run("Unicast", func() {
   368  		// unicast send from or to a node that has been evicted should fail with an error
   369  		suite.exchangeMessages(allowedIDs, allowedEngs, disallowedIDs, disallowedEngs, suite.Unicast, true)
   370  	})
   371  }
   372  
   373  // exchangeMessages verifies that allowed engines can successfully exchange messages between them while disallowed
   374  // engines can't using the ConduitSendWrapperFunc network primitive
   375  func (suite *MutableIdentityTableSuite) exchangeMessages(
   376  	allowedIDs flow.IdentityList,
   377  	allowedEngs []*testutils.MeshEngine,
   378  	disallowedIDs flow.IdentityList,
   379  	disallowedEngs []*testutils.MeshEngine,
   380  	send testutils.ConduitSendWrapperFunc,
   381  	expectSendErrorForDisallowedIDs bool) {
   382  
   383  	// send a message from each of the allowed engine to the other allowed engines
   384  	for i, allowedEng := range allowedEngs {
   385  
   386  		fromID := allowedIDs[i].NodeID
   387  		targetIDs := allowedIDs.Filter(filter.Not(filter.HasNodeID(allowedIDs[i].NodeID)))
   388  
   389  		err := suite.sendMessage(fromID, allowedEng, targetIDs, send)
   390  		require.NoError(suite.T(), err)
   391  	}
   392  
   393  	// send a message from each of the allowed engine to all of the disallowed engines
   394  	if len(disallowedEngs) > 0 {
   395  		for i, fromEng := range allowedEngs {
   396  
   397  			fromID := allowedIDs[i].NodeID
   398  			targetIDs := disallowedIDs
   399  
   400  			err := suite.sendMessage(fromID, fromEng, targetIDs, send)
   401  			if expectSendErrorForDisallowedIDs {
   402  				require.Error(suite.T(), err)
   403  			}
   404  		}
   405  	}
   406  
   407  	// send a message from each of the disallowed engine to each of the allowed engines
   408  	for i, fromEng := range disallowedEngs {
   409  
   410  		fromID := disallowedIDs[i].NodeID
   411  		targetIDs := allowedIDs
   412  
   413  		err := suite.sendMessage(fromID, fromEng, targetIDs, send)
   414  		if expectSendErrorForDisallowedIDs {
   415  			require.Error(suite.T(), err)
   416  		}
   417  	}
   418  
   419  	count := len(allowedEngs)
   420  	expectedMsgCnt := count - 1
   421  	wg := sync.WaitGroup{}
   422  	// fires a goroutine for each of the allowed engine to listen for incoming messages
   423  	for i := range allowedEngs {
   424  		wg.Add(expectedMsgCnt)
   425  		go func(e *testutils.MeshEngine) {
   426  			for x := 0; x < expectedMsgCnt; x++ {
   427  				<-e.Received
   428  				wg.Done()
   429  			}
   430  		}(allowedEngs[i])
   431  	}
   432  
   433  	// assert that all allowed engines received expectedMsgCnt number of messages
   434  	unittest.AssertReturnsBefore(suite.T(), wg.Wait, 5*time.Second)
   435  	// assert that all allowed engines received no other messages
   436  	for i := range allowedEngs {
   437  		assert.Empty(suite.T(), allowedEngs[i].Received)
   438  	}
   439  
   440  	// assert that the disallowed engines didn't receive any message
   441  	for i, eng := range disallowedEngs {
   442  		unittest.RequireNeverClosedWithin(suite.T(), eng.Received, time.Millisecond,
   443  			fmt.Sprintf("%s engine should not have recevied message", disallowedIDs[i]))
   444  	}
   445  }
   446  
   447  func (suite *MutableIdentityTableSuite) sendMessage(fromID flow.Identifier,
   448  	fromEngine *testutils.MeshEngine,
   449  	toIDs flow.IdentityList,
   450  	send testutils.ConduitSendWrapperFunc) error {
   451  
   452  	primitive := runtime.FuncForPC(reflect.ValueOf(send).Pointer()).Name()
   453  	event := &message.TestMessage{
   454  		Text: fmt.Sprintf("hello from node %s using %s", fromID.String(), primitive),
   455  	}
   456  
   457  	return send(event, fromEngine.Con, toIDs.NodeIDs()...)
   458  }