github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/gossip/simulation/network.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package simulation
    12  
    13  import (
    14  	"context"
    15  	"crypto/tls"
    16  	"net"
    17  	"time"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/base"
    20  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    21  	"github.com/cockroachdb/cockroach/pkg/gossip"
    22  	"github.com/cockroachdb/cockroach/pkg/gossip/resolver"
    23  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    24  	"github.com/cockroachdb/cockroach/pkg/rpc"
    25  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    26  	"github.com/cockroachdb/cockroach/pkg/util"
    27  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    28  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    29  	"github.com/cockroachdb/cockroach/pkg/util/log"
    30  	"github.com/cockroachdb/cockroach/pkg/util/metric"
    31  	"github.com/cockroachdb/cockroach/pkg/util/netutil"
    32  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    33  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    34  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    35  	"google.golang.org/grpc"
    36  )
    37  
    38  // Node represents a node used in a Network. It includes information
    39  // about the node's gossip instance, network address, and underlying
    40  // server.
    41  type Node struct {
    42  	Gossip    *gossip.Gossip
    43  	Server    *grpc.Server
    44  	Listener  net.Listener
    45  	Registry  *metric.Registry
    46  	Resolvers []resolver.Resolver
    47  }
    48  
    49  // Addr returns the address of the connected listener.
    50  func (n *Node) Addr() net.Addr {
    51  	return n.Listener.Addr()
    52  }
    53  
    54  // Network provides access to a test gossip network of nodes.
    55  type Network struct {
    56  	Nodes           []*Node
    57  	Stopper         *stop.Stopper
    58  	RPCContext      *rpc.Context
    59  	nodeIDAllocator roachpb.NodeID // provides unique node IDs
    60  	tlsConfig       *tls.Config
    61  	started         bool
    62  }
    63  
    64  // NewNetwork creates nodeCount gossip nodes.
    65  func NewNetwork(
    66  	stopper *stop.Stopper, nodeCount int, createResolvers bool, defaultZoneConfig *zonepb.ZoneConfig,
    67  ) *Network {
    68  	log.Infof(context.TODO(), "simulating gossip network with %d nodes", nodeCount)
    69  
    70  	n := &Network{
    71  		Nodes:   []*Node{},
    72  		Stopper: stopper,
    73  	}
    74  	n.RPCContext = rpc.NewContext(
    75  		log.AmbientContext{Tracer: tracing.NewTracer()},
    76  		&base.Config{Insecure: true},
    77  		hlc.NewClock(hlc.UnixNano, time.Nanosecond),
    78  		n.Stopper,
    79  		cluster.MakeTestingClusterSettings(),
    80  	)
    81  	var err error
    82  	n.tlsConfig, err = n.RPCContext.GetServerTLSConfig()
    83  	if err != nil {
    84  		log.Fatalf(context.TODO(), "%v", err)
    85  	}
    86  
    87  	// Ensure that tests using this test context and restart/shut down
    88  	// their servers do not inadvertently start talking to servers from
    89  	// unrelated concurrent tests.
    90  	n.RPCContext.ClusterID.Set(context.TODO(), uuid.MakeV4())
    91  
    92  	for i := 0; i < nodeCount; i++ {
    93  		node, err := n.CreateNode(defaultZoneConfig)
    94  		if err != nil {
    95  			log.Fatalf(context.TODO(), "%v", err)
    96  		}
    97  		// Build a resolver for each instance or we'll get data races.
    98  		if createResolvers {
    99  			r, err := resolver.NewResolverFromAddress(n.Nodes[0].Addr())
   100  			if err != nil {
   101  				log.Fatalf(context.TODO(), "bad gossip address %s: %s", n.Nodes[0].Addr(), err)
   102  			}
   103  			node.Resolvers = []resolver.Resolver{r}
   104  		}
   105  	}
   106  	return n
   107  }
   108  
   109  // CreateNode creates a simulation node and starts an RPC server for it.
   110  func (n *Network) CreateNode(defaultZoneConfig *zonepb.ZoneConfig) (*Node, error) {
   111  	server := rpc.NewServer(n.RPCContext)
   112  	ln, err := net.Listen(util.IsolatedTestAddr.Network(), util.IsolatedTestAddr.String())
   113  	if err != nil {
   114  		return nil, err
   115  	}
   116  	node := &Node{Server: server, Listener: ln, Registry: metric.NewRegistry()}
   117  	node.Gossip = gossip.NewTest(0, n.RPCContext, server, n.Stopper, node.Registry, defaultZoneConfig)
   118  	n.Stopper.RunWorker(context.TODO(), func(context.Context) {
   119  		<-n.Stopper.ShouldQuiesce()
   120  		netutil.FatalIfUnexpected(ln.Close())
   121  		<-n.Stopper.ShouldStop()
   122  		server.Stop()
   123  		node.Gossip.EnableSimulationCycler(false)
   124  	})
   125  	n.Nodes = append(n.Nodes, node)
   126  	return node, nil
   127  }
   128  
   129  // StartNode initializes a gossip instance for the simulation node and
   130  // starts it.
   131  func (n *Network) StartNode(node *Node) error {
   132  	node.Gossip.Start(node.Addr(), node.Resolvers)
   133  	node.Gossip.EnableSimulationCycler(true)
   134  	n.nodeIDAllocator++
   135  	node.Gossip.NodeID.Set(context.TODO(), n.nodeIDAllocator)
   136  	if err := node.Gossip.SetNodeDescriptor(&roachpb.NodeDescriptor{
   137  		NodeID:  node.Gossip.NodeID.Get(),
   138  		Address: util.MakeUnresolvedAddr(node.Addr().Network(), node.Addr().String()),
   139  	}); err != nil {
   140  		return err
   141  	}
   142  	if err := node.Gossip.AddInfo(node.Addr().String(),
   143  		encoding.EncodeUint64Ascending(nil, 0), time.Hour); err != nil {
   144  		return err
   145  	}
   146  	n.Stopper.RunWorker(context.TODO(), func(context.Context) {
   147  		netutil.FatalIfUnexpected(node.Server.Serve(node.Listener))
   148  	})
   149  	return nil
   150  }
   151  
   152  // GetNodeFromID returns the simulation node associated with
   153  // provided node ID, or nil if there is no such node.
   154  func (n *Network) GetNodeFromID(nodeID roachpb.NodeID) (*Node, bool) {
   155  	for _, node := range n.Nodes {
   156  		if node.Gossip.NodeID.Get() == nodeID {
   157  			return node, true
   158  		}
   159  	}
   160  	return nil, false
   161  }
   162  
   163  // SimulateNetwork runs until the simCallback returns false.
   164  //
   165  // At each cycle, every node gossips a key equal to its address (unique)
   166  // with the cycle as the value. The received cycle value can be used
   167  // to determine the aging of information between any two nodes in the
   168  // network.
   169  //
   170  // At each cycle of the simulation, node 0 gossips the sentinel.
   171  //
   172  // The simulation callback receives the cycle and the network as arguments.
   173  func (n *Network) SimulateNetwork(simCallback func(cycle int, network *Network) bool) {
   174  	n.Start()
   175  	nodes := n.Nodes
   176  	for cycle := 1; ; cycle++ {
   177  		// Node 0 gossips sentinel & cluster ID every cycle.
   178  		if err := nodes[0].Gossip.AddInfo(
   179  			gossip.KeySentinel,
   180  			encoding.EncodeUint64Ascending(nil, uint64(cycle)),
   181  			time.Hour,
   182  		); err != nil {
   183  			log.Fatalf(context.TODO(), "%v", err)
   184  		}
   185  		if err := nodes[0].Gossip.AddInfo(
   186  			gossip.KeyClusterID,
   187  			encoding.EncodeUint64Ascending(nil, uint64(cycle)),
   188  			0*time.Second,
   189  		); err != nil {
   190  			log.Fatalf(context.TODO(), "%v", err)
   191  		}
   192  		// Every node gossips every cycle.
   193  		for _, node := range nodes {
   194  			if err := node.Gossip.AddInfo(
   195  				node.Addr().String(),
   196  				encoding.EncodeUint64Ascending(nil, uint64(cycle)),
   197  				time.Hour,
   198  			); err != nil {
   199  				log.Fatalf(context.TODO(), "%v", err)
   200  			}
   201  			node.Gossip.SimulationCycle()
   202  		}
   203  		// If the simCallback returns false, we're done with the
   204  		// simulation; exit the loop. This condition is tested here
   205  		// instead of in the for statement in order to guarantee
   206  		// we run at least one iteration of this loop in order to
   207  		// gossip the cluster ID and sentinel.
   208  		if !simCallback(cycle, n) {
   209  			break
   210  		}
   211  		time.Sleep(5 * time.Millisecond)
   212  	}
   213  	log.Infof(context.TODO(), "gossip network simulation: total infos sent=%d, received=%d", n.infosSent(), n.infosReceived())
   214  }
   215  
   216  // Start starts all gossip nodes.
   217  // TODO(spencer): make all methods in Network return errors instead of
   218  // fatal logging.
   219  func (n *Network) Start() {
   220  	if n.started {
   221  		return
   222  	}
   223  	n.started = true
   224  	for _, node := range n.Nodes {
   225  		if err := n.StartNode(node); err != nil {
   226  			log.Fatalf(context.TODO(), "%v", err)
   227  		}
   228  	}
   229  }
   230  
   231  // RunUntilFullyConnected blocks until the gossip network has received
   232  // gossip from every other node in the network. It returns the gossip
   233  // cycle at which the network became fully connected.
   234  func (n *Network) RunUntilFullyConnected() int {
   235  	var connectedAtCycle int
   236  	n.SimulateNetwork(func(cycle int, network *Network) bool {
   237  		if network.IsNetworkConnected() {
   238  			connectedAtCycle = cycle
   239  			return false
   240  		}
   241  		return true
   242  	})
   243  	return connectedAtCycle
   244  }
   245  
   246  // IsNetworkConnected returns true if the network is fully connected
   247  // with no partitions (i.e. every node knows every other node's
   248  // network address).
   249  func (n *Network) IsNetworkConnected() bool {
   250  	for _, leftNode := range n.Nodes {
   251  		for _, rightNode := range n.Nodes {
   252  			if _, err := leftNode.Gossip.GetInfo(gossip.MakeNodeIDKey(rightNode.Gossip.NodeID.Get())); err != nil {
   253  				return false
   254  			}
   255  		}
   256  	}
   257  	return true
   258  }
   259  
   260  // infosSent returns the total count of infos sent from all nodes in
   261  // the network.
   262  func (n *Network) infosSent() int {
   263  	var count int64
   264  	for _, node := range n.Nodes {
   265  		count += node.Gossip.GetNodeMetrics().InfosSent.Counter.Count()
   266  	}
   267  	return int(count)
   268  }
   269  
   270  // infosReceived returns the total count of infos received from all
   271  // nodes in the network.
   272  func (n *Network) infosReceived() int {
   273  	var count int64
   274  	for _, node := range n.Nodes {
   275  		count += node.Gossip.GetNodeMetrics().InfosReceived.Counter.Count()
   276  	}
   277  	return int(count)
   278  }