github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/gossipsim/main.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 /* 12 Package simulation provides tools meant to visualize or test aspects 13 of a Cockroach cluster on a single host. 14 15 Gossip 16 17 Gossip creates a gossip network of up to 250 nodes and outputs 18 successive visualization of the gossip network graph via dot. 19 20 Uses tcp sockets for connecting 3, 10, 25, 50, 100 or 250 21 nodes. Generates .dot graph output files for each cycle of the 22 simulation. 23 24 To run: 25 26 go install github.com/cockroachdb/cockroach/cmd/gossipsim 27 gossipsim -size=(small|medium|large|huge|ginormous) 28 29 Log output includes instructions for displaying the graph output as a 30 series of images to visualize the evolution of the network. 31 32 Running the large through ginormous simulations will require the open 33 files limit be increased either for the shell running the simulation, 34 or system wide. For Linux: 35 36 # For the current shell: 37 ulimit -n 65536 38 39 # System-wide: 40 sysctl fs.file-max 41 fs.file-max = 50384 42 43 For MacOS: 44 45 # To view current limits (soft / hard): 46 launchctl limit maxfiles 47 48 # To edit, add/edit the following line in /etc/launchd.conf and 49 # restart for the new file limit to take effect. 50 # 51 # limit maxfiles 16384 32768 52 sudo vi /etc/launchd.conf 53 */ 54 package main 55 56 import ( 57 "context" 58 "flag" 59 "fmt" 60 "io/ioutil" 61 "math" 62 "os" 63 "strconv" 64 "strings" 65 66 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 67 "github.com/cockroachdb/cockroach/pkg/gossip" 68 "github.com/cockroachdb/cockroach/pkg/gossip/simulation" 69 "github.com/cockroachdb/cockroach/pkg/roachpb" 70 "github.com/cockroachdb/cockroach/pkg/util/encoding" 71 "github.com/cockroachdb/cockroach/pkg/util/log" 72 "github.com/cockroachdb/cockroach/pkg/util/randutil" 73 "github.com/cockroachdb/cockroach/pkg/util/stop" 74 ) 75 76 const ( 77 // minDotFontSize is the minimum font size for scaling node sizes 78 // proportional to the number of incoming connections. 79 minDotFontSize = 12 80 // maxDotFontSize is the maximum font size for scaling node sizes. 81 maxDotFontSize = 24 82 ) 83 84 var ( 85 size = flag.String("size", "medium", "size of network (tiny|small|medium|large|huge|ginormous)") 86 ) 87 88 // edge is a helper struct which describes an edge in the dot output graph. 89 type edge struct { 90 dest roachpb.NodeID // Node ID of destination 91 added bool // True if edge was recently added 92 deleted bool // True if edge was recently deleted 93 } 94 95 // edgeMap is a map from node address to a list of edges. A helper 96 // method is provided to simplify adding edges. 97 type edgeMap map[roachpb.NodeID][]edge 98 99 // addEdge creates a list of edges if one doesn't yet exist for the 100 // specified node ID. 101 func (em edgeMap) addEdge(nodeID roachpb.NodeID, e edge) { 102 if _, ok := em[nodeID]; !ok { 103 em[nodeID] = make([]edge, 0, 1) 104 } 105 em[nodeID] = append(em[nodeID], e) 106 } 107 108 // outputDotFile generates a .dot file describing the current state of 109 // the gossip network. nodes is a map from network address to gossip 110 // node. edgeSet is empty on the first invocation, but 111 // its content is set to encompass the entire set of edges in the 112 // network when this method returns. It should be resupplied with each 113 // successive invocation, as it is used to determine which edges are 114 // new and which have been deleted and show those changes visually in 115 // the output graph. New edges are drawn green; edges which were 116 // removed over the course of the last simulation step(s) are drawn in 117 // a lightly-dashed red. 118 // 119 // The format of the output looks like this: 120 // 121 // digraph G { 122 // node [shape=record]; 123 // node1 [fontsize=12,label="{Node 1|MH=3}"] 124 // node1 -> node3 [color=green] 125 // node1 -> node4 126 // node1 -> node5 [color=red,style=dotted] 127 // node2 [fontsize=24,label="{Node 2|MH=2}"] 128 // node2 -> node5 129 // node3 [fontsize=18,label="{Node 3|MH=5}"] 130 // node3 -> node5 131 // node3 -> node4 132 // node4 [fontsize=24,label="{Node 4|MH=4}"] 133 // node4 -> node2 134 // node5 [fontsize=24,label="{Node 5|MH=1}"] 135 // node5 -> node2 136 // node5 -> node3 137 // } 138 // 139 // Returns the name of the output file and a boolean for whether or not 140 // the network has quiesced (that is, no new edges, and all nodes are 141 // connected). 142 func outputDotFile( 143 dotFN string, cycle int, network *simulation.Network, edgeSet map[string]edge, 144 ) (string, bool) { 145 f, err := os.Create(dotFN) 146 if err != nil { 147 log.Fatalf(context.TODO(), "unable to create temp file: %s", err) 148 } 149 defer f.Close() 150 151 // Determine maximum number of incoming connections. Create outgoing 152 // edges, keeping track of which are new since last time (added=true). 153 outgoingMap := make(edgeMap) 154 var maxIncoming int 155 quiescent := true 156 // The order the graph file is written influences the arrangement 157 // of nodes in the output image, so it makes sense to eliminate 158 // randomness here. Unfortunately with graphviz it's fairly hard 159 // to get a consistent ordering. 160 for _, simNode := range network.Nodes { 161 node := simNode.Gossip 162 incoming := node.Incoming() 163 for _, iNode := range incoming { 164 e := edge{dest: node.NodeID.Get()} 165 key := fmt.Sprintf("%d:%d", iNode, node.NodeID.Get()) 166 if _, ok := edgeSet[key]; !ok { 167 e.added = true 168 quiescent = false 169 } 170 delete(edgeSet, key) 171 outgoingMap.addEdge(iNode, e) 172 } 173 if len(incoming) > maxIncoming { 174 maxIncoming = len(incoming) 175 } 176 } 177 178 // Find all edges which were deleted. 179 for key, e := range edgeSet { 180 e.added = false 181 e.deleted = true 182 quiescent = false 183 nodeID, err := strconv.Atoi(strings.Split(key, ":")[0]) 184 if err != nil { 185 log.Fatalf(context.TODO(), "%v", err) 186 } 187 outgoingMap.addEdge(roachpb.NodeID(nodeID), e) 188 delete(edgeSet, key) 189 } 190 191 fmt.Fprintln(f, "digraph G {") 192 fmt.Fprintln(f, "node [shape=record];") 193 for _, simNode := range network.Nodes { 194 node := simNode.Gossip 195 var missing []roachpb.NodeID 196 var totalAge int64 197 for _, otherNode := range network.Nodes { 198 if otherNode == simNode { 199 continue // skip the node's own info 200 } 201 infoKey := otherNode.Addr().String() 202 // GetInfo returns an error if the info is missing. 203 if info, err := node.GetInfo(infoKey); err != nil { 204 missing = append(missing, otherNode.Gossip.NodeID.Get()) 205 quiescent = false 206 } else { 207 _, val, err := encoding.DecodeUint64Ascending(info) 208 if err != nil { 209 log.Fatalf(context.TODO(), "bad decode of node info cycle: %s", err) 210 } 211 totalAge += int64(cycle) - int64(val) 212 } 213 } 214 log.Infof(context.TODO(), "node %d: missing infos for nodes %s", node.NodeID.Get(), missing) 215 216 var sentinelAge int64 217 // GetInfo returns an error if the info is missing. 218 if info, err := node.GetInfo(gossip.KeySentinel); err != nil { 219 log.Infof(context.TODO(), "error getting info for sentinel gossip key %q: %s", gossip.KeySentinel, err) 220 } else { 221 _, val, err := encoding.DecodeUint64Ascending(info) 222 if err != nil { 223 log.Fatalf(context.TODO(), "bad decode of sentinel cycle: %s", err) 224 } 225 sentinelAge = int64(cycle) - int64(val) 226 } 227 228 var age, nodeColor string 229 if len(missing) > 0 { 230 nodeColor = "color=red," 231 age = fmt.Sprintf("missing %d", len(missing)) 232 } else { 233 age = strconv.FormatFloat(float64(totalAge)/float64(len(network.Nodes)-1-len(missing)), 'f', 4, 64) 234 } 235 fontSize := minDotFontSize 236 if maxIncoming > 0 { 237 fontSize = minDotFontSize + int(math.Floor(float64(len(node.Incoming())* 238 (maxDotFontSize-minDotFontSize))/float64(maxIncoming))) 239 } 240 fmt.Fprintf(f, "\t%s [%sfontsize=%d,label=\"{%s|AA=%s, MH=%d, SA=%d}\"]\n", 241 node.NodeID.Get(), nodeColor, fontSize, node.NodeID.Get(), age, node.MaxHops(), sentinelAge) 242 outgoing := outgoingMap[node.NodeID.Get()] 243 for _, e := range outgoing { 244 destSimNode, ok := network.GetNodeFromID(e.dest) 245 if !ok { 246 continue 247 } 248 dest := destSimNode.Gossip 249 style := "" 250 if e.added { 251 style = " [color=green]" 252 } else if e.deleted { 253 style = " [color=red,style=dotted]" 254 } 255 fmt.Fprintf(f, "\t%s -> %s%s\n", node.NodeID.Get(), dest.NodeID.Get(), style) 256 if !e.deleted { 257 edgeSet[fmt.Sprintf("%d:%d", node.NodeID.Get(), e.dest)] = e 258 } 259 } 260 } 261 fmt.Fprintln(f, "}") 262 return f.Name(), quiescent 263 } 264 265 func main() { 266 // Seed the random number generator for non-determinism across 267 // multiple runs. 268 randutil.SeedForTests() 269 270 if f := flag.Lookup("logtostderr"); f != nil { 271 fmt.Println("Starting simulation. Add -logtostderr to see progress.") 272 } 273 flag.Parse() 274 275 dirName, err := ioutil.TempDir("", "gossip-simulation-") 276 if err != nil { 277 log.Fatalf(context.TODO(), "could not create temporary directory for gossip simulation output: %s", err) 278 } 279 280 // Simulation callbacks to run the simulation for cycleCount 281 // cycles. At each cycle % outputEvery, a dot file showing the 282 // state of the network graph is output. 283 nodeCount := 3 284 switch *size { 285 case "tiny": 286 // Use default parameters. 287 case "small": 288 nodeCount = 10 289 case "medium": 290 nodeCount = 25 291 case "large": 292 nodeCount = 50 293 case "huge": 294 nodeCount = 100 295 case "ginormous": 296 nodeCount = 250 297 default: 298 log.Fatalf(context.TODO(), "unknown simulation size: %s", *size) 299 } 300 301 edgeSet := make(map[string]edge) 302 303 stopper := stop.NewStopper() 304 defer stopper.Stop(context.TODO()) 305 306 n := simulation.NewNetwork(stopper, nodeCount, true, zonepb.DefaultZoneConfigRef()) 307 n.SimulateNetwork( 308 func(cycle int, network *simulation.Network) bool { 309 // Output dot graph. 310 dotFN := fmt.Sprintf("%s/sim-cycle-%03d.dot", dirName, cycle) 311 _, quiescent := outputDotFile(dotFN, cycle, network, edgeSet) 312 // Run until network has quiesced. 313 return !quiescent 314 }, 315 ) 316 317 // Output instructions for viewing graphs. 318 fmt.Printf("To view simulation graph output run (you must install graphviz):\n\nfor f in %s/*.dot ; do circo $f -Tpng -o $f.png ; echo $f.png ; done\n", dirName) 319 }