github.com/susy-go/susy-graviton@v0.0.0-20190614130430-36cddae42305/swarm/network/simulations/discovery/discovery_test.go (about)

     1  // Copyleft 2018 The susy-graviton Authors
     2  // This file is part of the susy-graviton library.
     3  //
     4  // The susy-graviton library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The susy-graviton library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MSRCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the susy-graviton library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package discovery
    18  
    19  import (
    20  	"context"
    21  	"flag"
    22  	"fmt"
    23  	"io/ioutil"
    24  	"os"
    25  	"path"
    26  	"strings"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/susy-go/susy-graviton/common"
    31  	"github.com/susy-go/susy-graviton/log"
    32  	"github.com/susy-go/susy-graviton/node"
    33  	"github.com/susy-go/susy-graviton/p2p"
    34  	"github.com/susy-go/susy-graviton/p2p/enode"
    35  	"github.com/susy-go/susy-graviton/p2p/simulations"
    36  	"github.com/susy-go/susy-graviton/p2p/simulations/adapters"
    37  	"github.com/susy-go/susy-graviton/swarm/network"
    38  	"github.com/susy-go/susy-graviton/swarm/state"
    39  	colorable "github.com/mattn/go-colorable"
    40  )
    41  
    42  // serviceName is used with the exec adapter so the exec'd binary knows which
    43  // service to execute
    44  const serviceName = "discovery"
    45  const testNeighbourhoodSize = 2
    46  const discoveryPersistenceDatadir = "discovery_persistence_test_store"
    47  
    48  var discoveryPersistencePath = path.Join(os.TempDir(), discoveryPersistenceDatadir)
    49  var discoveryEnabled = true
    50  var persistenceEnabled = false
    51  
    52  var services = adapters.Services{
    53  	serviceName: newService,
    54  }
    55  
    56  func cleanDbStores() error {
    57  	entries, err := ioutil.ReadDir(os.TempDir())
    58  	if err != nil {
    59  		return err
    60  	}
    61  
    62  	for _, f := range entries {
    63  		if strings.HasPrefix(f.Name(), discoveryPersistenceDatadir) {
    64  			os.RemoveAll(path.Join(os.TempDir(), f.Name()))
    65  		}
    66  	}
    67  	return nil
    68  
    69  }
    70  
    71  func getDbStore(nodeID string) (*state.DBStore, error) {
    72  	if _, err := os.Stat(discoveryPersistencePath + "_" + nodeID); os.IsNotExist(err) {
    73  		log.Info(fmt.Sprintf("directory for nodeID %s does not exist. creating...", nodeID))
    74  		ioutil.TempDir("", discoveryPersistencePath+"_"+nodeID)
    75  	}
    76  	log.Info(fmt.Sprintf("opening storage directory for nodeID %s", nodeID))
    77  	store, err := state.NewDBStore(discoveryPersistencePath + "_" + nodeID)
    78  	if err != nil {
    79  		return nil, err
    80  	}
    81  	return store, nil
    82  }
    83  
    84  var (
    85  	nodeCount = flag.Int("nodes", 32, "number of nodes to create (default 32)")
    86  	initCount = flag.Int("conns", 1, "number of originally connected peers	 (default 1)")
    87  	loglevel  = flag.Int("loglevel", 3, "verbosity of logs")
    88  	rawlog    = flag.Bool("rawlog", false, "remove terminal formatting from logs")
    89  )
    90  
    91  func init() {
    92  	flag.Parse()
    93  	// register the discovery service which will run as a devp2p
    94  	// protocol when using the exec adapter
    95  	adapters.RegisterServices(services)
    96  
    97  	log.PrintOrigins(true)
    98  	log.Root().SetHandler(log.LvlFilterHandler(log.Lvl(*loglevel), log.StreamHandler(colorable.NewColorableStderr(), log.TerminalFormat(!*rawlog))))
    99  }
   100  
   101  // Benchmarks to test the average time it takes for an N-node ring
   102  // to full a healthy kademlia topology
   103  func BenchmarkDiscovery_8_1(b *testing.B)   { benchmarkDiscovery(b, 8, 1) }
   104  func BenchmarkDiscovery_16_1(b *testing.B)  { benchmarkDiscovery(b, 16, 1) }
   105  func BenchmarkDiscovery_32_1(b *testing.B)  { benchmarkDiscovery(b, 32, 1) }
   106  func BenchmarkDiscovery_64_1(b *testing.B)  { benchmarkDiscovery(b, 64, 1) }
   107  func BenchmarkDiscovery_128_1(b *testing.B) { benchmarkDiscovery(b, 128, 1) }
   108  func BenchmarkDiscovery_256_1(b *testing.B) { benchmarkDiscovery(b, 256, 1) }
   109  
   110  func BenchmarkDiscovery_8_2(b *testing.B)   { benchmarkDiscovery(b, 8, 2) }
   111  func BenchmarkDiscovery_16_2(b *testing.B)  { benchmarkDiscovery(b, 16, 2) }
   112  func BenchmarkDiscovery_32_2(b *testing.B)  { benchmarkDiscovery(b, 32, 2) }
   113  func BenchmarkDiscovery_64_2(b *testing.B)  { benchmarkDiscovery(b, 64, 2) }
   114  func BenchmarkDiscovery_128_2(b *testing.B) { benchmarkDiscovery(b, 128, 2) }
   115  func BenchmarkDiscovery_256_2(b *testing.B) { benchmarkDiscovery(b, 256, 2) }
   116  
   117  func BenchmarkDiscovery_8_4(b *testing.B)   { benchmarkDiscovery(b, 8, 4) }
   118  func BenchmarkDiscovery_16_4(b *testing.B)  { benchmarkDiscovery(b, 16, 4) }
   119  func BenchmarkDiscovery_32_4(b *testing.B)  { benchmarkDiscovery(b, 32, 4) }
   120  func BenchmarkDiscovery_64_4(b *testing.B)  { benchmarkDiscovery(b, 64, 4) }
   121  func BenchmarkDiscovery_128_4(b *testing.B) { benchmarkDiscovery(b, 128, 4) }
   122  func BenchmarkDiscovery_256_4(b *testing.B) { benchmarkDiscovery(b, 256, 4) }
   123  
   124  func TestDiscoverySimulationExecAdapter(t *testing.T) {
   125  	testDiscoverySimulationExecAdapter(t, *nodeCount, *initCount)
   126  }
   127  
   128  func testDiscoverySimulationExecAdapter(t *testing.T, nodes, conns int) {
   129  	baseDir, err := ioutil.TempDir("", "swarm-test")
   130  	if err != nil {
   131  		t.Fatal(err)
   132  	}
   133  	defer os.RemoveAll(baseDir)
   134  	testDiscoverySimulation(t, nodes, conns, adapters.NewExecAdapter(baseDir))
   135  }
   136  
   137  func TestDiscoverySimulationSimAdapter(t *testing.T) {
   138  	testDiscoverySimulationSimAdapter(t, *nodeCount, *initCount)
   139  }
   140  
   141  func TestDiscoveryPersistenceSimulationSimAdapter(t *testing.T) {
   142  	testDiscoveryPersistenceSimulationSimAdapter(t, *nodeCount, *initCount)
   143  }
   144  
   145  func testDiscoveryPersistenceSimulationSimAdapter(t *testing.T, nodes, conns int) {
   146  	testDiscoveryPersistenceSimulation(t, nodes, conns, adapters.NewSimAdapter(services))
   147  }
   148  
   149  func testDiscoverySimulationSimAdapter(t *testing.T, nodes, conns int) {
   150  	testDiscoverySimulation(t, nodes, conns, adapters.NewSimAdapter(services))
   151  }
   152  
   153  func testDiscoverySimulation(t *testing.T, nodes, conns int, adapter adapters.NodeAdapter) {
   154  	startedAt := time.Now()
   155  	result, err := discoverySimulation(nodes, conns, adapter)
   156  	if err != nil {
   157  		t.Fatalf("Setting up simulation failed: %v", err)
   158  	}
   159  	if result.Error != nil {
   160  		t.Fatalf("Simulation failed: %s", result.Error)
   161  	}
   162  	t.Logf("Simulation with %d nodes passed in %s", nodes, result.FinishedAt.Sub(result.StartedAt))
   163  	var min, max time.Duration
   164  	var sum int
   165  	for _, pass := range result.Passes {
   166  		duration := pass.Sub(result.StartedAt)
   167  		if sum == 0 || duration < min {
   168  			min = duration
   169  		}
   170  		if duration > max {
   171  			max = duration
   172  		}
   173  		sum += int(duration.Nanoseconds())
   174  	}
   175  	t.Logf("Min: %s, Max: %s, Average: %s", min, max, time.Duration(sum/len(result.Passes))*time.Nanosecond)
   176  	finishedAt := time.Now()
   177  	t.Logf("Setup: %s, shutdown: %s", result.StartedAt.Sub(startedAt), finishedAt.Sub(result.FinishedAt))
   178  }
   179  
   180  func testDiscoveryPersistenceSimulation(t *testing.T, nodes, conns int, adapter adapters.NodeAdapter) map[int][]byte {
   181  	persistenceEnabled = true
   182  	discoveryEnabled = true
   183  
   184  	result, err := discoveryPersistenceSimulation(nodes, conns, adapter)
   185  
   186  	if err != nil {
   187  		t.Fatalf("Setting up simulation failed: %v", err)
   188  	}
   189  	if result.Error != nil {
   190  		t.Fatalf("Simulation failed: %s", result.Error)
   191  	}
   192  	t.Logf("Simulation with %d nodes passed in %s", nodes, result.FinishedAt.Sub(result.StartedAt))
   193  	// set the discovery and persistence flags again to default so other
   194  	// tests will not be affected
   195  	discoveryEnabled = true
   196  	persistenceEnabled = false
   197  	return nil
   198  }
   199  
   200  func benchmarkDiscovery(b *testing.B, nodes, conns int) {
   201  	for i := 0; i < b.N; i++ {
   202  		result, err := discoverySimulation(nodes, conns, adapters.NewSimAdapter(services))
   203  		if err != nil {
   204  			b.Fatalf("setting up simulation failed: %v", err)
   205  		}
   206  		if result.Error != nil {
   207  			b.Logf("simulation failed: %s", result.Error)
   208  		}
   209  	}
   210  }
   211  
   212  func discoverySimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simulations.StepResult, error) {
   213  	// create network
   214  	net := simulations.NewNetwork(adapter, &simulations.NetworkConfig{
   215  		ID:             "0",
   216  		DefaultService: serviceName,
   217  	})
   218  	defer net.Shutdown()
   219  	trigger := make(chan enode.ID)
   220  	ids := make([]enode.ID, nodes)
   221  	for i := 0; i < nodes; i++ {
   222  		conf := adapters.RandomNodeConfig()
   223  		node, err := net.NewNodeWithConfig(conf)
   224  		if err != nil {
   225  			return nil, fmt.Errorf("error starting node: %s", err)
   226  		}
   227  		if err := net.Start(node.ID()); err != nil {
   228  			return nil, fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err)
   229  		}
   230  		if err := triggerChecks(trigger, net, node.ID()); err != nil {
   231  			return nil, fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err)
   232  		}
   233  		ids[i] = node.ID()
   234  	}
   235  
   236  	// run a simulation which connects the 10 nodes in a ring and waits
   237  	// for full peer discovery
   238  	var addrs [][]byte
   239  	action := func(ctx context.Context) error {
   240  		return nil
   241  	}
   242  	for i := range ids {
   243  		// collect the overlay addresses, to
   244  		addrs = append(addrs, ids[i].Bytes())
   245  	}
   246  	err := net.ConnectNodesChain(nil)
   247  	if err != nil {
   248  		return nil, err
   249  	}
   250  	log.Debug(fmt.Sprintf("nodes: %v", len(addrs)))
   251  	// construct the peer pot, so that kademlia health can be checked
   252  	ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
   253  	check := func(ctx context.Context, id enode.ID) (bool, error) {
   254  		select {
   255  		case <-ctx.Done():
   256  			return false, ctx.Err()
   257  		default:
   258  		}
   259  
   260  		node := net.GetNode(id)
   261  		if node == nil {
   262  			return false, fmt.Errorf("unknown node: %s", id)
   263  		}
   264  		client, err := node.Client()
   265  		if err != nil {
   266  			return false, fmt.Errorf("error getting node client: %s", err)
   267  		}
   268  
   269  		healthy := &network.Health{}
   270  		if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
   271  			return false, fmt.Errorf("error getting node health: %s", err)
   272  		}
   273  		log.Debug(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v,\n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Hive))
   274  		return healthy.KnowNN && healthy.ConnectNN, nil
   275  	}
   276  
   277  	// 64 nodes ~ 1min
   278  	// 128 nodes ~
   279  	timeout := 300 * time.Second
   280  	ctx, cancel := context.WithTimeout(context.Background(), timeout)
   281  	defer cancel()
   282  	result := simulations.NewSimulation(net).Run(ctx, &simulations.Step{
   283  		Action:  action,
   284  		Trigger: trigger,
   285  		Expect: &simulations.Expectation{
   286  			Nodes: ids,
   287  			Check: check,
   288  		},
   289  	})
   290  	if result.Error != nil {
   291  		return result, nil
   292  	}
   293  	return result, nil
   294  }
   295  
   296  func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simulations.StepResult, error) {
   297  	cleanDbStores()
   298  	defer cleanDbStores()
   299  
   300  	// create network
   301  	net := simulations.NewNetwork(adapter, &simulations.NetworkConfig{
   302  		ID:             "0",
   303  		DefaultService: serviceName,
   304  	})
   305  	defer net.Shutdown()
   306  	trigger := make(chan enode.ID)
   307  	ids := make([]enode.ID, nodes)
   308  	var addrs [][]byte
   309  
   310  	for i := 0; i < nodes; i++ {
   311  		conf := adapters.RandomNodeConfig()
   312  		node, err := net.NewNodeWithConfig(conf)
   313  		if err != nil {
   314  			panic(err)
   315  		}
   316  		if err != nil {
   317  			return nil, fmt.Errorf("error starting node: %s", err)
   318  		}
   319  		if err := net.Start(node.ID()); err != nil {
   320  			return nil, fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err)
   321  		}
   322  		if err := triggerChecks(trigger, net, node.ID()); err != nil {
   323  			return nil, fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err)
   324  		}
   325  		// TODO we shouldn't be equating underaddr and overaddr like this, as they are not the same in production
   326  		ids[i] = node.ID()
   327  		a := ids[i].Bytes()
   328  
   329  		addrs = append(addrs, a)
   330  	}
   331  
   332  	// run a simulation which connects the 10 nodes in a ring and waits
   333  	// for full peer discovery
   334  
   335  	var restartTime time.Time
   336  
   337  	action := func(ctx context.Context) error {
   338  		ticker := time.NewTicker(500 * time.Millisecond)
   339  
   340  		for range ticker.C {
   341  			isHealthy := true
   342  			for _, id := range ids {
   343  				//call Healthy RPC
   344  				node := net.GetNode(id)
   345  				if node == nil {
   346  					return fmt.Errorf("unknown node: %s", id)
   347  				}
   348  				client, err := node.Client()
   349  				if err != nil {
   350  					return fmt.Errorf("error getting node client: %s", err)
   351  				}
   352  				healthy := &network.Health{}
   353  				addr := id.String()
   354  				ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
   355  				if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
   356  					return fmt.Errorf("error getting node health: %s", err)
   357  				}
   358  
   359  				log.Info(fmt.Sprintf("NODE: %s, IS HEALTHY: %t", addr, healthy.ConnectNN && healthy.KnowNN && healthy.CountKnowNN > 0))
   360  				var nodeStr string
   361  				if err := client.Call(&nodeStr, "hive_string"); err != nil {
   362  					return fmt.Errorf("error getting node string %s", err)
   363  				}
   364  				log.Info(nodeStr)
   365  				if !healthy.ConnectNN || healthy.CountKnowNN == 0 {
   366  					isHealthy = false
   367  					break
   368  				}
   369  			}
   370  			if isHealthy {
   371  				break
   372  			}
   373  		}
   374  		ticker.Stop()
   375  
   376  		log.Info("reached healthy kademlia. starting to shutdown nodes.")
   377  		shutdownStarted := time.Now()
   378  		// stop all ids, then start them again
   379  		for _, id := range ids {
   380  			node := net.GetNode(id)
   381  
   382  			if err := net.Stop(node.ID()); err != nil {
   383  				return fmt.Errorf("error stopping node %s: %s", node.ID().TerminalString(), err)
   384  			}
   385  		}
   386  		log.Info(fmt.Sprintf("shutting down nodes took: %s", time.Since(shutdownStarted)))
   387  		persistenceEnabled = true
   388  		discoveryEnabled = false
   389  		restartTime = time.Now()
   390  		for _, id := range ids {
   391  			node := net.GetNode(id)
   392  			if err := net.Start(node.ID()); err != nil {
   393  				return fmt.Errorf("error starting node %s: %s", node.ID().TerminalString(), err)
   394  			}
   395  			if err := triggerChecks(trigger, net, node.ID()); err != nil {
   396  				return fmt.Errorf("error triggering checks for node %s: %s", node.ID().TerminalString(), err)
   397  			}
   398  		}
   399  
   400  		log.Info(fmt.Sprintf("restarting nodes took: %s", time.Since(restartTime)))
   401  
   402  		return nil
   403  	}
   404  	net.ConnectNodesChain(nil)
   405  	log.Debug(fmt.Sprintf("nodes: %v", len(addrs)))
   406  	// construct the peer pot, so that kademlia health can be checked
   407  	check := func(ctx context.Context, id enode.ID) (bool, error) {
   408  		select {
   409  		case <-ctx.Done():
   410  			return false, ctx.Err()
   411  		default:
   412  		}
   413  
   414  		node := net.GetNode(id)
   415  		if node == nil {
   416  			return false, fmt.Errorf("unknown node: %s", id)
   417  		}
   418  		client, err := node.Client()
   419  		if err != nil {
   420  			return false, fmt.Errorf("error getting node client: %s", err)
   421  		}
   422  		healthy := &network.Health{}
   423  		ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
   424  
   425  		if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
   426  			return false, fmt.Errorf("error getting node health: %s", err)
   427  		}
   428  		log.Info(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v", id, healthy.ConnectNN, healthy.KnowNN))
   429  
   430  		return healthy.KnowNN && healthy.ConnectNN, nil
   431  	}
   432  
   433  	// 64 nodes ~ 1min
   434  	// 128 nodes ~
   435  	timeout := 300 * time.Second
   436  	ctx, cancel := context.WithTimeout(context.Background(), timeout)
   437  	defer cancel()
   438  	result := simulations.NewSimulation(net).Run(ctx, &simulations.Step{
   439  		Action:  action,
   440  		Trigger: trigger,
   441  		Expect: &simulations.Expectation{
   442  			Nodes: ids,
   443  			Check: check,
   444  		},
   445  	})
   446  	if result.Error != nil {
   447  		return result, nil
   448  	}
   449  
   450  	return result, nil
   451  }
   452  
   453  // triggerChecks triggers a simulation step check whenever a peer is added or
   454  // removed from the given node, and also every second to avoid a race between
   455  // peer events and kademlia becoming healthy
   456  func triggerChecks(trigger chan enode.ID, net *simulations.Network, id enode.ID) error {
   457  	node := net.GetNode(id)
   458  	if node == nil {
   459  		return fmt.Errorf("unknown node: %s", id)
   460  	}
   461  	client, err := node.Client()
   462  	if err != nil {
   463  		return err
   464  	}
   465  	events := make(chan *p2p.PeerEvent)
   466  	sub, err := client.Subscribe(context.Background(), "admin", events, "peerEvents")
   467  	if err != nil {
   468  		return fmt.Errorf("error getting peer events for node %v: %s", id, err)
   469  	}
   470  	go func() {
   471  		defer sub.Unsubscribe()
   472  
   473  		tick := time.NewTicker(time.Second)
   474  		defer tick.Stop()
   475  
   476  		for {
   477  			select {
   478  			case <-events:
   479  				trigger <- id
   480  			case <-tick.C:
   481  				trigger <- id
   482  			case err := <-sub.Err():
   483  				if err != nil {
   484  					log.Error(fmt.Sprintf("error getting peer events for node %v", id), "err", err)
   485  				}
   486  				return
   487  			}
   488  		}
   489  	}()
   490  	return nil
   491  }
   492  
   493  func newService(ctx *adapters.ServiceContext) (node.Service, error) {
   494  	addr := network.NewAddr(ctx.Config.Node())
   495  
   496  	kp := network.NewKadParams()
   497  	kp.NeighbourhoodSize = testNeighbourhoodSize
   498  
   499  	if ctx.Config.Reachable != nil {
   500  		kp.Reachable = func(o *network.BzzAddr) bool {
   501  			return ctx.Config.Reachable(o.ID())
   502  		}
   503  	}
   504  	kad := network.NewKademlia(addr.Over(), kp)
   505  	hp := network.NewHiveParams()
   506  	hp.KeepAliveInterval = time.Duration(200) * time.Millisecond
   507  	hp.Discovery = discoveryEnabled
   508  
   509  	log.Info(fmt.Sprintf("discovery for nodeID %s is %t", ctx.Config.ID.String(), hp.Discovery))
   510  
   511  	config := &network.BzzConfig{
   512  		OverlayAddr:  addr.Over(),
   513  		UnderlayAddr: addr.Under(),
   514  		HiveParams:   hp,
   515  	}
   516  
   517  	if persistenceEnabled {
   518  		log.Info(fmt.Sprintf("persistence enabled for nodeID %s", ctx.Config.ID.String()))
   519  		store, err := getDbStore(ctx.Config.ID.String())
   520  		if err != nil {
   521  			return nil, err
   522  		}
   523  		return network.NewBzz(config, kad, store, nil, nil), nil
   524  	}
   525  
   526  	return network.NewBzz(config, kad, nil, nil, nil), nil
   527  }