github.com/jincm/wesharechain@v0.0.0-20210122032815-1537409ce26a/chain/swarm/network/simulations/discovery/discovery_test.go

github.com/jincm/wesharechain@v0.0.0-20210122032815-1537409ce26a/chain/swarm/network/simulations/discovery/discovery_test.go (about)

     1  // Copyright 2018 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package discovery
    18  
    19  import (
    20  	"context"
    21  	"flag"
    22  	"fmt"
    23  	"io/ioutil"
    24  	"os"
    25  	"path"
    26  	"strings"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/ethereum/go-ethereum/swarm/testutil"
    31  
    32  	"github.com/ethereum/go-ethereum/common"
    33  	"github.com/ethereum/go-ethereum/log"
    34  	"github.com/ethereum/go-ethereum/node"
    35  	"github.com/ethereum/go-ethereum/p2p"
    36  	"github.com/ethereum/go-ethereum/p2p/enode"
    37  	"github.com/ethereum/go-ethereum/p2p/simulations"
    38  	"github.com/ethereum/go-ethereum/p2p/simulations/adapters"
    39  	"github.com/ethereum/go-ethereum/swarm/network"
    40  	"github.com/ethereum/go-ethereum/swarm/state"
    41  	colorable "github.com/mattn/go-colorable"
    42  )
    43  
    44  // serviceName is used with the exec adapter so the exec'd binary knows which
    45  // service to execute
    46  const serviceName = "discovery"
    47  const testNeighbourhoodSize = 2
    48  const discoveryPersistenceDatadir = "discovery_persistence_test_store"
    49  
    50  var discoveryPersistencePath = path.Join(os.TempDir(), discoveryPersistenceDatadir)
    51  var discoveryEnabled = true
    52  var persistenceEnabled = false
    53  
    54  var services = adapters.Services{
    55  	serviceName: newService,
    56  }
    57  
    58  func cleanDbStores() error {
    59  	entries, err := ioutil.ReadDir(os.TempDir())
    60  	if err != nil {
    61  		return err
    62  	}
    63  
    64  	for _, f := range entries {
    65  		if strings.HasPrefix(f.Name(), discoveryPersistenceDatadir) {
    66  			os.RemoveAll(path.Join(os.TempDir(), f.Name()))
    67  		}
    68  	}
    69  	return nil
    70  
    71  }
    72  
    73  func getDbStore(nodeID string) (*state.DBStore, error) {
    74  	if _, err := os.Stat(discoveryPersistencePath + "_" + nodeID); os.IsNotExist(err) {
    75  		log.Info(fmt.Sprintf("directory for nodeID %s does not exist. creating...", nodeID))
    76  		ioutil.TempDir("", discoveryPersistencePath+"_"+nodeID)
    77  	}
    78  	log.Info(fmt.Sprintf("opening storage directory for nodeID %s", nodeID))
    79  	store, err := state.NewDBStore(discoveryPersistencePath + "_" + nodeID)
    80  	if err != nil {
    81  		return nil, err
    82  	}
    83  	return store, nil
    84  }
    85  
    86  var (
    87  	nodeCount = flag.Int("nodes", defaultNodeCount(), "number of nodes to create (default 32)")
    88  	initCount = flag.Int("conns", 1, "number of originally connected peers	 (default 1)")
    89  	loglevel  = flag.Int("loglevel", 3, "verbosity of logs")
    90  	rawlog    = flag.Bool("rawlog", false, "remove terminal formatting from logs")
    91  )
    92  
    93  func defaultNodeCount() int {
    94  	if testutil.RaceEnabled {
    95  		return 8
    96  	}
    97  	return 32
    98  }
    99  
   100  func init() {
   101  	flag.Parse()
   102  	// register the discovery service which will run as a devp2p
   103  	// protocol when using the exec adapter
   104  	adapters.RegisterServices(services)
   105  
   106  	log.PrintOrigins(true)
   107  	log.Root().SetHandler(log.LvlFilterHandler(log.Lvl(*loglevel), log.StreamHandler(colorable.NewColorableStderr(), log.TerminalFormat(!*rawlog))))
   108  }
   109  
   110  // Benchmarks to test the average time it takes for an N-node ring
   111  // to full a healthy kademlia topology
   112  func BenchmarkDiscovery_8_1(b *testing.B)   { benchmarkDiscovery(b, 8, 1) }
   113  func BenchmarkDiscovery_16_1(b *testing.B)  { benchmarkDiscovery(b, 16, 1) }
   114  func BenchmarkDiscovery_32_1(b *testing.B)  { benchmarkDiscovery(b, 32, 1) }
   115  func BenchmarkDiscovery_64_1(b *testing.B)  { benchmarkDiscovery(b, 64, 1) }
   116  func BenchmarkDiscovery_128_1(b *testing.B) { benchmarkDiscovery(b, 128, 1) }
   117  func BenchmarkDiscovery_256_1(b *testing.B) { benchmarkDiscovery(b, 256, 1) }
   118  
   119  func BenchmarkDiscovery_8_2(b *testing.B)   { benchmarkDiscovery(b, 8, 2) }
   120  func BenchmarkDiscovery_16_2(b *testing.B)  { benchmarkDiscovery(b, 16, 2) }
   121  func BenchmarkDiscovery_32_2(b *testing.B)  { benchmarkDiscovery(b, 32, 2) }
   122  func BenchmarkDiscovery_64_2(b *testing.B)  { benchmarkDiscovery(b, 64, 2) }
   123  func BenchmarkDiscovery_128_2(b *testing.B) { benchmarkDiscovery(b, 128, 2) }
   124  func BenchmarkDiscovery_256_2(b *testing.B) { benchmarkDiscovery(b, 256, 2) }
   125  
   126  func BenchmarkDiscovery_8_4(b *testing.B)   { benchmarkDiscovery(b, 8, 4) }
   127  func BenchmarkDiscovery_16_4(b *testing.B)  { benchmarkDiscovery(b, 16, 4) }
   128  func BenchmarkDiscovery_32_4(b *testing.B)  { benchmarkDiscovery(b, 32, 4) }
   129  func BenchmarkDiscovery_64_4(b *testing.B)  { benchmarkDiscovery(b, 64, 4) }
   130  func BenchmarkDiscovery_128_4(b *testing.B) { benchmarkDiscovery(b, 128, 4) }
   131  func BenchmarkDiscovery_256_4(b *testing.B) { benchmarkDiscovery(b, 256, 4) }
   132  
   133  func TestDiscoverySimulationExecAdapter(t *testing.T) {
   134  	testDiscoverySimulationExecAdapter(t, *nodeCount, *initCount)
   135  }
   136  
   137  func testDiscoverySimulationExecAdapter(t *testing.T, nodes, conns int) {
   138  	baseDir, err := ioutil.TempDir("", "swarm-test")
   139  	if err != nil {
   140  		t.Fatal(err)
   141  	}
   142  	defer os.RemoveAll(baseDir)
   143  	testDiscoverySimulation(t, nodes, conns, adapters.NewExecAdapter(baseDir))
   144  }
   145  
   146  func TestDiscoverySimulationSimAdapter(t *testing.T) {
   147  	testDiscoverySimulationSimAdapter(t, *nodeCount, *initCount)
   148  }
   149  
   150  func TestDiscoveryPersistenceSimulationSimAdapter(t *testing.T) {
   151  	testDiscoveryPersistenceSimulationSimAdapter(t, *nodeCount, *initCount)
   152  }
   153  
   154  func testDiscoveryPersistenceSimulationSimAdapter(t *testing.T, nodes, conns int) {
   155  	testDiscoveryPersistenceSimulation(t, nodes, conns, adapters.NewSimAdapter(services))
   156  }
   157  
   158  func testDiscoverySimulationSimAdapter(t *testing.T, nodes, conns int) {
   159  	testDiscoverySimulation(t, nodes, conns, adapters.NewSimAdapter(services))
   160  }
   161  
   162  func testDiscoverySimulation(t *testing.T, nodes, conns int, adapter adapters.NodeAdapter) {
   163  	startedAt := time.Now()
   164  	result, err := discoverySimulation(nodes, conns, adapter)
   165  	if err != nil {
   166  		t.Fatalf("Setting up simulation failed: %v", err)
   167  	}
   168  	if result.Error != nil {
   169  		t.Fatalf("Simulation failed: %s", result.Error)
   170  	}
   171  	t.Logf("Simulation with %d nodes passed in %s", nodes, result.FinishedAt.Sub(result.StartedAt))
   172  	var min, max time.Duration
   173  	var sum int
   174  	for _, pass := range result.Passes {
   175  		duration := pass.Sub(result.StartedAt)
   176  		if sum == 0 || duration < min {
   177  			min = duration
   178  		}
   179  		if duration > max {
   180  			max = duration
   181  		}
   182  		sum += int(duration.Nanoseconds())
   183  	}
   184  	t.Logf("Min: %s, Max: %s, Average: %s", min, max, time.Duration(sum/len(result.Passes))*time.Nanosecond)
   185  	finishedAt := time.Now()
   186  	t.Logf("Setup: %s, shutdown: %s", result.StartedAt.Sub(startedAt), finishedAt.Sub(result.FinishedAt))
   187  }
   188  
   189  func testDiscoveryPersistenceSimulation(t *testing.T, nodes, conns int, adapter adapters.NodeAdapter) map[int][]byte {
   190  	persistenceEnabled = true
   191  	discoveryEnabled = true
   192  
   193  	result, err := discoveryPersistenceSimulation(nodes, conns, adapter)
   194  
   195  	if err != nil {
   196  		t.Fatalf("Setting up simulation failed: %v", err)
   197  	}
   198  	if result.Error != nil {
   199  		t.Fatalf("Simulation failed: %s", result.Error)
   200  	}
   201  	t.Logf("Simulation with %d nodes passed in %s", nodes, result.FinishedAt.Sub(result.StartedAt))
   202  	// set the discovery and persistence flags again to default so other
   203  	// tests will not be affected
   204  	discoveryEnabled = true
   205  	persistenceEnabled = false
   206  	return nil
   207  }
   208  
   209  func benchmarkDiscovery(b *testing.B, nodes, conns int) {
   210  	for i := 0; i < b.N; i++ {
   211  		result, err := discoverySimulation(nodes, conns, adapters.NewSimAdapter(services))
   212  		if err != nil {
   213  			b.Fatalf("setting up simulation failed: %v", err)
   214  		}
   215  		if result.Error != nil {
   216  			b.Logf("simulation failed: %s", result.Error)
   217  		}
   218  	}
   219  }
   220  
   221  func discoverySimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simulations.StepResult, error) {
   222  	// create network
   223  	net := simulations.NewNetwork(adapter, &simulations.NetworkConfig{
   224  		ID:             "0",
   225  		DefaultService: serviceName,
   226  	})
   227  	defer net.Shutdown()
   228  	trigger := make(chan enode.ID)
   229  	ids := make([]enode.ID, nodes)
   230  	for i := 0; i < nodes; i++ {
   231  		conf := adapters.RandomNodeConfig()
   232  		node, err := net.NewNodeWithConfig(conf)
   233  		if err != nil {
   234  			return nil, fmt.Errorf("error starting node: %s", err)
   235  		}
   236  		if err := net.Start(node.ID()); err != nil {
   237  			return nil, fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err)
   238  		}
   239  		if err := triggerChecks(trigger, net, node.ID()); err != nil {
   240  			return nil, fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err)
   241  		}
   242  		ids[i] = node.ID()
   243  	}
   244  
   245  	// run a simulation which connects the 10 nodes in a ring and waits
   246  	// for full peer discovery
   247  	var addrs [][]byte
   248  	action := func(ctx context.Context) error {
   249  		return nil
   250  	}
   251  	for i := range ids {
   252  		// collect the overlay addresses, to
   253  		addrs = append(addrs, ids[i].Bytes())
   254  	}
   255  	err := net.ConnectNodesChain(nil)
   256  	if err != nil {
   257  		return nil, err
   258  	}
   259  	log.Debug(fmt.Sprintf("nodes: %v", len(addrs)))
   260  	// construct the peer pot, so that kademlia health can be checked
   261  	ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
   262  	check := func(ctx context.Context, id enode.ID) (bool, error) {
   263  		select {
   264  		case <-ctx.Done():
   265  			return false, ctx.Err()
   266  		default:
   267  		}
   268  
   269  		node := net.GetNode(id)
   270  		if node == nil {
   271  			return false, fmt.Errorf("unknown node: %s", id)
   272  		}
   273  		client, err := node.Client()
   274  		if err != nil {
   275  			return false, fmt.Errorf("error getting node client: %s", err)
   276  		}
   277  
   278  		healthy := &network.Health{}
   279  		if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
   280  			return false, fmt.Errorf("error getting node health: %s", err)
   281  		}
   282  		log.Debug(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v,\n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Hive))
   283  		return healthy.KnowNN && healthy.ConnectNN, nil
   284  	}
   285  
   286  	// 64 nodes ~ 1min
   287  	// 128 nodes ~
   288  	timeout := 300 * time.Second
   289  	ctx, cancel := context.WithTimeout(context.Background(), timeout)
   290  	defer cancel()
   291  	result := simulations.NewSimulation(net).Run(ctx, &simulations.Step{
   292  		Action:  action,
   293  		Trigger: trigger,
   294  		Expect: &simulations.Expectation{
   295  			Nodes: ids,
   296  			Check: check,
   297  		},
   298  	})
   299  	if result.Error != nil {
   300  		return result, nil
   301  	}
   302  	return result, nil
   303  }
   304  
   305  func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simulations.StepResult, error) {
   306  	cleanDbStores()
   307  	defer cleanDbStores()
   308  
   309  	// create network
   310  	net := simulations.NewNetwork(adapter, &simulations.NetworkConfig{
   311  		ID:             "0",
   312  		DefaultService: serviceName,
   313  	})
   314  	defer net.Shutdown()
   315  	trigger := make(chan enode.ID)
   316  	ids := make([]enode.ID, nodes)
   317  	var addrs [][]byte
   318  
   319  	for i := 0; i < nodes; i++ {
   320  		conf := adapters.RandomNodeConfig()
   321  		node, err := net.NewNodeWithConfig(conf)
   322  		if err != nil {
   323  			panic(err)
   324  		}
   325  		if err != nil {
   326  			return nil, fmt.Errorf("error starting node: %s", err)
   327  		}
   328  		if err := net.Start(node.ID()); err != nil {
   329  			return nil, fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err)
   330  		}
   331  		if err := triggerChecks(trigger, net, node.ID()); err != nil {
   332  			return nil, fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err)
   333  		}
   334  		// TODO we shouldn't be equating underaddr and overaddr like this, as they are not the same in production
   335  		ids[i] = node.ID()
   336  		a := ids[i].Bytes()
   337  
   338  		addrs = append(addrs, a)
   339  	}
   340  
   341  	// run a simulation which connects the 10 nodes in a ring and waits
   342  	// for full peer discovery
   343  
   344  	var restartTime time.Time
   345  
   346  	action := func(ctx context.Context) error {
   347  		ticker := time.NewTicker(500 * time.Millisecond)
   348  
   349  		for range ticker.C {
   350  			isHealthy := true
   351  			for _, id := range ids {
   352  				//call Healthy RPC
   353  				node := net.GetNode(id)
   354  				if node == nil {
   355  					return fmt.Errorf("unknown node: %s", id)
   356  				}
   357  				client, err := node.Client()
   358  				if err != nil {
   359  					return fmt.Errorf("error getting node client: %s", err)
   360  				}
   361  				healthy := &network.Health{}
   362  				addr := id.String()
   363  				ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
   364  				if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
   365  					return fmt.Errorf("error getting node health: %s", err)
   366  				}
   367  
   368  				log.Info(fmt.Sprintf("NODE: %s, IS HEALTHY: %t", addr, healthy.ConnectNN && healthy.KnowNN && healthy.CountKnowNN > 0))
   369  				var nodeStr string
   370  				if err := client.Call(&nodeStr, "hive_string"); err != nil {
   371  					return fmt.Errorf("error getting node string %s", err)
   372  				}
   373  				log.Info(nodeStr)
   374  				if !healthy.ConnectNN || healthy.CountKnowNN == 0 {
   375  					isHealthy = false
   376  					break
   377  				}
   378  			}
   379  			if isHealthy {
   380  				break
   381  			}
   382  		}
   383  		ticker.Stop()
   384  
   385  		log.Info("reached healthy kademlia. starting to shutdown nodes.")
   386  		shutdownStarted := time.Now()
   387  		// stop all ids, then start them again
   388  		for _, id := range ids {
   389  			node := net.GetNode(id)
   390  
   391  			if err := net.Stop(node.ID()); err != nil {
   392  				return fmt.Errorf("error stopping node %s: %s", node.ID().TerminalString(), err)
   393  			}
   394  		}
   395  		log.Info(fmt.Sprintf("shutting down nodes took: %s", time.Since(shutdownStarted)))
   396  		persistenceEnabled = true
   397  		discoveryEnabled = false
   398  		restartTime = time.Now()
   399  		for _, id := range ids {
   400  			node := net.GetNode(id)
   401  			if err := net.Start(node.ID()); err != nil {
   402  				return fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err)
   403  			}
   404  			if err := triggerChecks(trigger, net, node.ID()); err != nil {
   405  				return fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err)
   406  			}
   407  		}
   408  
   409  		log.Info(fmt.Sprintf("restarting nodes took: %s", time.Since(restartTime)))
   410  
   411  		return nil
   412  	}
   413  	net.ConnectNodesChain(nil)
   414  	log.Debug(fmt.Sprintf("nodes: %v", len(addrs)))
   415  	// construct the peer pot, so that kademlia health can be checked
   416  	check := func(ctx context.Context, id enode.ID) (bool, error) {
   417  		select {
   418  		case <-ctx.Done():
   419  			return false, ctx.Err()
   420  		default:
   421  		}
   422  
   423  		node := net.GetNode(id)
   424  		if node == nil {
   425  			return false, fmt.Errorf("unknown node: %s", id)
   426  		}
   427  		client, err := node.Client()
   428  		if err != nil {
   429  			return false, fmt.Errorf("error getting node client: %s", err)
   430  		}
   431  		healthy := &network.Health{}
   432  		ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
   433  
   434  		if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
   435  			return false, fmt.Errorf("error getting node health: %s", err)
   436  		}
   437  		log.Info(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v", id, healthy.ConnectNN, healthy.KnowNN))
   438  
   439  		return healthy.KnowNN && healthy.ConnectNN, nil
   440  	}
   441  
   442  	// 64 nodes ~ 1min
   443  	// 128 nodes ~
   444  	timeout := 300 * time.Second
   445  	ctx, cancel := context.WithTimeout(context.Background(), timeout)
   446  	defer cancel()
   447  	result := simulations.NewSimulation(net).Run(ctx, &simulations.Step{
   448  		Action:  action,
   449  		Trigger: trigger,
   450  		Expect: &simulations.Expectation{
   451  			Nodes: ids,
   452  			Check: check,
   453  		},
   454  	})
   455  	if result.Error != nil {
   456  		return result, nil
   457  	}
   458  
   459  	return result, nil
   460  }
   461  
   462  // triggerChecks triggers a simulation step check whenever a peer is added or
   463  // removed from the given node, and also every second to avoid a race between
   464  // peer events and kademlia becoming healthy
   465  func triggerChecks(trigger chan enode.ID, net *simulations.Network, id enode.ID) error {
   466  	node := net.GetNode(id)
   467  	if node == nil {
   468  		return fmt.Errorf("unknown node: %s", id)
   469  	}
   470  	client, err := node.Client()
   471  	if err != nil {
   472  		return err
   473  	}
   474  	events := make(chan *p2p.PeerEvent)
   475  	sub, err := client.Subscribe(context.Background(), "admin", events, "peerEvents")
   476  	if err != nil {
   477  		return fmt.Errorf("error getting peer events for node %v: %s", id, err)
   478  	}
   479  	go func() {
   480  		defer sub.Unsubscribe()
   481  
   482  		tick := time.NewTicker(time.Second)
   483  		defer tick.Stop()
   484  
   485  		for {
   486  			select {
   487  			case <-events:
   488  				trigger <- id
   489  			case <-tick.C:
   490  				trigger <- id
   491  			case err := <-sub.Err():
   492  				if err != nil {
   493  					log.Error(fmt.Sprintf("error getting peer events for node %v", id), "err", err)
   494  				}
   495  				return
   496  			}
   497  		}
   498  	}()
   499  	return nil
   500  }
   501  
   502  func newService(ctx *adapters.ServiceContext) (node.Service, error) {
   503  	addr := network.NewAddr(ctx.Config.Node())
   504  
   505  	kp := network.NewKadParams()
   506  	kp.NeighbourhoodSize = testNeighbourhoodSize
   507  
   508  	if ctx.Config.Reachable != nil {
   509  		kp.Reachable = func(o *network.BzzAddr) bool {
   510  			return ctx.Config.Reachable(o.ID())
   511  		}
   512  	}
   513  	kad := network.NewKademlia(addr.Over(), kp)
   514  	hp := network.NewHiveParams()
   515  	hp.KeepAliveInterval = time.Duration(200) * time.Millisecond
   516  	hp.Discovery = discoveryEnabled
   517  
   518  	log.Info(fmt.Sprintf("discovery for nodeID %s is %t", ctx.Config.ID.String(), hp.Discovery))
   519  
   520  	config := &network.BzzConfig{
   521  		OverlayAddr:  addr.Over(),
   522  		UnderlayAddr: addr.Under(),
   523  		HiveParams:   hp,
   524  	}
   525  
   526  	if persistenceEnabled {
   527  		log.Info(fmt.Sprintf("persistence enabled for nodeID %s", ctx.Config.ID.String()))
   528  		store, err := getDbStore(ctx.Config.ID.String())
   529  		if err != nil {
   530  			return nil, err
   531  		}
   532  		return network.NewBzz(config, kad, store, nil, nil), nil
   533  	}
   534  
   535  	return network.NewBzz(config, kad, nil, nil, nil), nil
   536  }