github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/gossipsim/main.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  /*
    12  Package simulation provides tools meant to visualize or test aspects
    13  of a Cockroach cluster on a single host.
    14  
    15  Gossip
    16  
    17  Gossip creates a gossip network of up to 250 nodes and outputs
    18  successive visualization of the gossip network graph via dot.
    19  
    20  Uses tcp sockets for connecting 3, 10, 25, 50, 100 or 250
    21  nodes. Generates .dot graph output files for each cycle of the
    22  simulation.
    23  
    24  To run:
    25  
    26      go install github.com/cockroachdb/cockroach/cmd/gossipsim
    27      gossipsim -size=(small|medium|large|huge|ginormous)
    28  
    29  Log output includes instructions for displaying the graph output as a
    30  series of images to visualize the evolution of the network.
    31  
    32  Running the large through ginormous simulations will require the open
    33  files limit be increased either for the shell running the simulation,
    34  or system wide. For Linux:
    35  
    36      # For the current shell:
    37      ulimit -n 65536
    38  
    39      # System-wide:
    40      sysctl fs.file-max
    41      fs.file-max = 50384
    42  
    43  For MacOS:
    44  
    45      # To view current limits (soft / hard):
    46      launchctl limit maxfiles
    47  
    48      # To edit, add/edit the following line in /etc/launchd.conf and
    49      # restart for the new file limit to take effect.
    50      #
    51      # limit maxfiles 16384 32768
    52      sudo vi /etc/launchd.conf
    53  */
    54  package main
    55  
    56  import (
    57  	"context"
    58  	"flag"
    59  	"fmt"
    60  	"io/ioutil"
    61  	"math"
    62  	"os"
    63  	"strconv"
    64  	"strings"
    65  
    66  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    67  	"github.com/cockroachdb/cockroach/pkg/gossip"
    68  	"github.com/cockroachdb/cockroach/pkg/gossip/simulation"
    69  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    70  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    71  	"github.com/cockroachdb/cockroach/pkg/util/log"
    72  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    73  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    74  )
    75  
    76  const (
    77  	// minDotFontSize is the minimum font size for scaling node sizes
    78  	// proportional to the number of incoming connections.
    79  	minDotFontSize = 12
    80  	// maxDotFontSize is the maximum font size for scaling node sizes.
    81  	maxDotFontSize = 24
    82  )
    83  
    84  var (
    85  	size = flag.String("size", "medium", "size of network (tiny|small|medium|large|huge|ginormous)")
    86  )
    87  
    88  // edge is a helper struct which describes an edge in the dot output graph.
    89  type edge struct {
    90  	dest    roachpb.NodeID // Node ID of destination
    91  	added   bool           // True if edge was recently added
    92  	deleted bool           // True if edge was recently deleted
    93  }
    94  
    95  // edgeMap is a map from node address to a list of edges. A helper
    96  // method is provided to simplify adding edges.
    97  type edgeMap map[roachpb.NodeID][]edge
    98  
    99  // addEdge creates a list of edges if one doesn't yet exist for the
   100  // specified node ID.
   101  func (em edgeMap) addEdge(nodeID roachpb.NodeID, e edge) {
   102  	if _, ok := em[nodeID]; !ok {
   103  		em[nodeID] = make([]edge, 0, 1)
   104  	}
   105  	em[nodeID] = append(em[nodeID], e)
   106  }
   107  
   108  // outputDotFile generates a .dot file describing the current state of
   109  // the gossip network. nodes is a map from network address to gossip
   110  // node. edgeSet is empty on the first invocation, but
   111  // its content is set to encompass the entire set of edges in the
   112  // network when this method returns. It should be resupplied with each
   113  // successive invocation, as it is used to determine which edges are
   114  // new and which have been deleted and show those changes visually in
   115  // the output graph. New edges are drawn green; edges which were
   116  // removed over the course of the last simulation step(s) are drawn in
   117  // a lightly-dashed red.
   118  //
   119  // The format of the output looks like this:
   120  //
   121  //   digraph G {
   122  //   node [shape=record];
   123  //        node1 [fontsize=12,label="{Node 1|MH=3}"]
   124  //        node1 -> node3 [color=green]
   125  //        node1 -> node4
   126  //        node1 -> node5 [color=red,style=dotted]
   127  //        node2 [fontsize=24,label="{Node 2|MH=2}"]
   128  //        node2 -> node5
   129  //        node3 [fontsize=18,label="{Node 3|MH=5}"]
   130  //        node3 -> node5
   131  //        node3 -> node4
   132  //        node4 [fontsize=24,label="{Node 4|MH=4}"]
   133  //        node4 -> node2
   134  //        node5 [fontsize=24,label="{Node 5|MH=1}"]
   135  //        node5 -> node2
   136  //        node5 -> node3
   137  //   }
   138  //
   139  // Returns the name of the output file and a boolean for whether or not
   140  // the network has quiesced (that is, no new edges, and all nodes are
   141  // connected).
   142  func outputDotFile(
   143  	dotFN string, cycle int, network *simulation.Network, edgeSet map[string]edge,
   144  ) (string, bool) {
   145  	f, err := os.Create(dotFN)
   146  	if err != nil {
   147  		log.Fatalf(context.TODO(), "unable to create temp file: %s", err)
   148  	}
   149  	defer f.Close()
   150  
   151  	// Determine maximum number of incoming connections. Create outgoing
   152  	// edges, keeping track of which are new since last time (added=true).
   153  	outgoingMap := make(edgeMap)
   154  	var maxIncoming int
   155  	quiescent := true
   156  	// The order the graph file is written influences the arrangement
   157  	// of nodes in the output image, so it makes sense to eliminate
   158  	// randomness here. Unfortunately with graphviz it's fairly hard
   159  	// to get a consistent ordering.
   160  	for _, simNode := range network.Nodes {
   161  		node := simNode.Gossip
   162  		incoming := node.Incoming()
   163  		for _, iNode := range incoming {
   164  			e := edge{dest: node.NodeID.Get()}
   165  			key := fmt.Sprintf("%d:%d", iNode, node.NodeID.Get())
   166  			if _, ok := edgeSet[key]; !ok {
   167  				e.added = true
   168  				quiescent = false
   169  			}
   170  			delete(edgeSet, key)
   171  			outgoingMap.addEdge(iNode, e)
   172  		}
   173  		if len(incoming) > maxIncoming {
   174  			maxIncoming = len(incoming)
   175  		}
   176  	}
   177  
   178  	// Find all edges which were deleted.
   179  	for key, e := range edgeSet {
   180  		e.added = false
   181  		e.deleted = true
   182  		quiescent = false
   183  		nodeID, err := strconv.Atoi(strings.Split(key, ":")[0])
   184  		if err != nil {
   185  			log.Fatalf(context.TODO(), "%v", err)
   186  		}
   187  		outgoingMap.addEdge(roachpb.NodeID(nodeID), e)
   188  		delete(edgeSet, key)
   189  	}
   190  
   191  	fmt.Fprintln(f, "digraph G {")
   192  	fmt.Fprintln(f, "node [shape=record];")
   193  	for _, simNode := range network.Nodes {
   194  		node := simNode.Gossip
   195  		var missing []roachpb.NodeID
   196  		var totalAge int64
   197  		for _, otherNode := range network.Nodes {
   198  			if otherNode == simNode {
   199  				continue // skip the node's own info
   200  			}
   201  			infoKey := otherNode.Addr().String()
   202  			// GetInfo returns an error if the info is missing.
   203  			if info, err := node.GetInfo(infoKey); err != nil {
   204  				missing = append(missing, otherNode.Gossip.NodeID.Get())
   205  				quiescent = false
   206  			} else {
   207  				_, val, err := encoding.DecodeUint64Ascending(info)
   208  				if err != nil {
   209  					log.Fatalf(context.TODO(), "bad decode of node info cycle: %s", err)
   210  				}
   211  				totalAge += int64(cycle) - int64(val)
   212  			}
   213  		}
   214  		log.Infof(context.TODO(), "node %d: missing infos for nodes %s", node.NodeID.Get(), missing)
   215  
   216  		var sentinelAge int64
   217  		// GetInfo returns an error if the info is missing.
   218  		if info, err := node.GetInfo(gossip.KeySentinel); err != nil {
   219  			log.Infof(context.TODO(), "error getting info for sentinel gossip key %q: %s", gossip.KeySentinel, err)
   220  		} else {
   221  			_, val, err := encoding.DecodeUint64Ascending(info)
   222  			if err != nil {
   223  				log.Fatalf(context.TODO(), "bad decode of sentinel cycle: %s", err)
   224  			}
   225  			sentinelAge = int64(cycle) - int64(val)
   226  		}
   227  
   228  		var age, nodeColor string
   229  		if len(missing) > 0 {
   230  			nodeColor = "color=red,"
   231  			age = fmt.Sprintf("missing %d", len(missing))
   232  		} else {
   233  			age = strconv.FormatFloat(float64(totalAge)/float64(len(network.Nodes)-1-len(missing)), 'f', 4, 64)
   234  		}
   235  		fontSize := minDotFontSize
   236  		if maxIncoming > 0 {
   237  			fontSize = minDotFontSize + int(math.Floor(float64(len(node.Incoming())*
   238  				(maxDotFontSize-minDotFontSize))/float64(maxIncoming)))
   239  		}
   240  		fmt.Fprintf(f, "\t%s [%sfontsize=%d,label=\"{%s|AA=%s, MH=%d, SA=%d}\"]\n",
   241  			node.NodeID.Get(), nodeColor, fontSize, node.NodeID.Get(), age, node.MaxHops(), sentinelAge)
   242  		outgoing := outgoingMap[node.NodeID.Get()]
   243  		for _, e := range outgoing {
   244  			destSimNode, ok := network.GetNodeFromID(e.dest)
   245  			if !ok {
   246  				continue
   247  			}
   248  			dest := destSimNode.Gossip
   249  			style := ""
   250  			if e.added {
   251  				style = " [color=green]"
   252  			} else if e.deleted {
   253  				style = " [color=red,style=dotted]"
   254  			}
   255  			fmt.Fprintf(f, "\t%s -> %s%s\n", node.NodeID.Get(), dest.NodeID.Get(), style)
   256  			if !e.deleted {
   257  				edgeSet[fmt.Sprintf("%d:%d", node.NodeID.Get(), e.dest)] = e
   258  			}
   259  		}
   260  	}
   261  	fmt.Fprintln(f, "}")
   262  	return f.Name(), quiescent
   263  }
   264  
   265  func main() {
   266  	// Seed the random number generator for non-determinism across
   267  	// multiple runs.
   268  	randutil.SeedForTests()
   269  
   270  	if f := flag.Lookup("logtostderr"); f != nil {
   271  		fmt.Println("Starting simulation. Add -logtostderr to see progress.")
   272  	}
   273  	flag.Parse()
   274  
   275  	dirName, err := ioutil.TempDir("", "gossip-simulation-")
   276  	if err != nil {
   277  		log.Fatalf(context.TODO(), "could not create temporary directory for gossip simulation output: %s", err)
   278  	}
   279  
   280  	// Simulation callbacks to run the simulation for cycleCount
   281  	// cycles. At each cycle % outputEvery, a dot file showing the
   282  	// state of the network graph is output.
   283  	nodeCount := 3
   284  	switch *size {
   285  	case "tiny":
   286  		// Use default parameters.
   287  	case "small":
   288  		nodeCount = 10
   289  	case "medium":
   290  		nodeCount = 25
   291  	case "large":
   292  		nodeCount = 50
   293  	case "huge":
   294  		nodeCount = 100
   295  	case "ginormous":
   296  		nodeCount = 250
   297  	default:
   298  		log.Fatalf(context.TODO(), "unknown simulation size: %s", *size)
   299  	}
   300  
   301  	edgeSet := make(map[string]edge)
   302  
   303  	stopper := stop.NewStopper()
   304  	defer stopper.Stop(context.TODO())
   305  
   306  	n := simulation.NewNetwork(stopper, nodeCount, true, zonepb.DefaultZoneConfigRef())
   307  	n.SimulateNetwork(
   308  		func(cycle int, network *simulation.Network) bool {
   309  			// Output dot graph.
   310  			dotFN := fmt.Sprintf("%s/sim-cycle-%03d.dot", dirName, cycle)
   311  			_, quiescent := outputDotFile(dotFN, cycle, network, edgeSet)
   312  			// Run until network has quiesced.
   313  			return !quiescent
   314  		},
   315  	)
   316  
   317  	// Output instructions for viewing graphs.
   318  	fmt.Printf("To view simulation graph output run (you must install graphviz):\n\nfor f in %s/*.dot ; do circo $f -Tpng -o $f.png ; echo $f.png ; done\n", dirName)
   319  }