github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/gossip/simulation/network.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package simulation 12 13 import ( 14 "context" 15 "crypto/tls" 16 "net" 17 "time" 18 19 "github.com/cockroachdb/cockroach/pkg/base" 20 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 21 "github.com/cockroachdb/cockroach/pkg/gossip" 22 "github.com/cockroachdb/cockroach/pkg/gossip/resolver" 23 "github.com/cockroachdb/cockroach/pkg/roachpb" 24 "github.com/cockroachdb/cockroach/pkg/rpc" 25 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 26 "github.com/cockroachdb/cockroach/pkg/util" 27 "github.com/cockroachdb/cockroach/pkg/util/encoding" 28 "github.com/cockroachdb/cockroach/pkg/util/hlc" 29 "github.com/cockroachdb/cockroach/pkg/util/log" 30 "github.com/cockroachdb/cockroach/pkg/util/metric" 31 "github.com/cockroachdb/cockroach/pkg/util/netutil" 32 "github.com/cockroachdb/cockroach/pkg/util/stop" 33 "github.com/cockroachdb/cockroach/pkg/util/tracing" 34 "github.com/cockroachdb/cockroach/pkg/util/uuid" 35 "google.golang.org/grpc" 36 ) 37 38 // Node represents a node used in a Network. It includes information 39 // about the node's gossip instance, network address, and underlying 40 // server. 41 type Node struct { 42 Gossip *gossip.Gossip 43 Server *grpc.Server 44 Listener net.Listener 45 Registry *metric.Registry 46 Resolvers []resolver.Resolver 47 } 48 49 // Addr returns the address of the connected listener. 50 func (n *Node) Addr() net.Addr { 51 return n.Listener.Addr() 52 } 53 54 // Network provides access to a test gossip network of nodes. 55 type Network struct { 56 Nodes []*Node 57 Stopper *stop.Stopper 58 RPCContext *rpc.Context 59 nodeIDAllocator roachpb.NodeID // provides unique node IDs 60 tlsConfig *tls.Config 61 started bool 62 } 63 64 // NewNetwork creates nodeCount gossip nodes. 65 func NewNetwork( 66 stopper *stop.Stopper, nodeCount int, createResolvers bool, defaultZoneConfig *zonepb.ZoneConfig, 67 ) *Network { 68 log.Infof(context.TODO(), "simulating gossip network with %d nodes", nodeCount) 69 70 n := &Network{ 71 Nodes: []*Node{}, 72 Stopper: stopper, 73 } 74 n.RPCContext = rpc.NewContext( 75 log.AmbientContext{Tracer: tracing.NewTracer()}, 76 &base.Config{Insecure: true}, 77 hlc.NewClock(hlc.UnixNano, time.Nanosecond), 78 n.Stopper, 79 cluster.MakeTestingClusterSettings(), 80 ) 81 var err error 82 n.tlsConfig, err = n.RPCContext.GetServerTLSConfig() 83 if err != nil { 84 log.Fatalf(context.TODO(), "%v", err) 85 } 86 87 // Ensure that tests using this test context and restart/shut down 88 // their servers do not inadvertently start talking to servers from 89 // unrelated concurrent tests. 90 n.RPCContext.ClusterID.Set(context.TODO(), uuid.MakeV4()) 91 92 for i := 0; i < nodeCount; i++ { 93 node, err := n.CreateNode(defaultZoneConfig) 94 if err != nil { 95 log.Fatalf(context.TODO(), "%v", err) 96 } 97 // Build a resolver for each instance or we'll get data races. 98 if createResolvers { 99 r, err := resolver.NewResolverFromAddress(n.Nodes[0].Addr()) 100 if err != nil { 101 log.Fatalf(context.TODO(), "bad gossip address %s: %s", n.Nodes[0].Addr(), err) 102 } 103 node.Resolvers = []resolver.Resolver{r} 104 } 105 } 106 return n 107 } 108 109 // CreateNode creates a simulation node and starts an RPC server for it. 110 func (n *Network) CreateNode(defaultZoneConfig *zonepb.ZoneConfig) (*Node, error) { 111 server := rpc.NewServer(n.RPCContext) 112 ln, err := net.Listen(util.IsolatedTestAddr.Network(), util.IsolatedTestAddr.String()) 113 if err != nil { 114 return nil, err 115 } 116 node := &Node{Server: server, Listener: ln, Registry: metric.NewRegistry()} 117 node.Gossip = gossip.NewTest(0, n.RPCContext, server, n.Stopper, node.Registry, defaultZoneConfig) 118 n.Stopper.RunWorker(context.TODO(), func(context.Context) { 119 <-n.Stopper.ShouldQuiesce() 120 netutil.FatalIfUnexpected(ln.Close()) 121 <-n.Stopper.ShouldStop() 122 server.Stop() 123 node.Gossip.EnableSimulationCycler(false) 124 }) 125 n.Nodes = append(n.Nodes, node) 126 return node, nil 127 } 128 129 // StartNode initializes a gossip instance for the simulation node and 130 // starts it. 131 func (n *Network) StartNode(node *Node) error { 132 node.Gossip.Start(node.Addr(), node.Resolvers) 133 node.Gossip.EnableSimulationCycler(true) 134 n.nodeIDAllocator++ 135 node.Gossip.NodeID.Set(context.TODO(), n.nodeIDAllocator) 136 if err := node.Gossip.SetNodeDescriptor(&roachpb.NodeDescriptor{ 137 NodeID: node.Gossip.NodeID.Get(), 138 Address: util.MakeUnresolvedAddr(node.Addr().Network(), node.Addr().String()), 139 }); err != nil { 140 return err 141 } 142 if err := node.Gossip.AddInfo(node.Addr().String(), 143 encoding.EncodeUint64Ascending(nil, 0), time.Hour); err != nil { 144 return err 145 } 146 n.Stopper.RunWorker(context.TODO(), func(context.Context) { 147 netutil.FatalIfUnexpected(node.Server.Serve(node.Listener)) 148 }) 149 return nil 150 } 151 152 // GetNodeFromID returns the simulation node associated with 153 // provided node ID, or nil if there is no such node. 154 func (n *Network) GetNodeFromID(nodeID roachpb.NodeID) (*Node, bool) { 155 for _, node := range n.Nodes { 156 if node.Gossip.NodeID.Get() == nodeID { 157 return node, true 158 } 159 } 160 return nil, false 161 } 162 163 // SimulateNetwork runs until the simCallback returns false. 164 // 165 // At each cycle, every node gossips a key equal to its address (unique) 166 // with the cycle as the value. The received cycle value can be used 167 // to determine the aging of information between any two nodes in the 168 // network. 169 // 170 // At each cycle of the simulation, node 0 gossips the sentinel. 171 // 172 // The simulation callback receives the cycle and the network as arguments. 173 func (n *Network) SimulateNetwork(simCallback func(cycle int, network *Network) bool) { 174 n.Start() 175 nodes := n.Nodes 176 for cycle := 1; ; cycle++ { 177 // Node 0 gossips sentinel & cluster ID every cycle. 178 if err := nodes[0].Gossip.AddInfo( 179 gossip.KeySentinel, 180 encoding.EncodeUint64Ascending(nil, uint64(cycle)), 181 time.Hour, 182 ); err != nil { 183 log.Fatalf(context.TODO(), "%v", err) 184 } 185 if err := nodes[0].Gossip.AddInfo( 186 gossip.KeyClusterID, 187 encoding.EncodeUint64Ascending(nil, uint64(cycle)), 188 0*time.Second, 189 ); err != nil { 190 log.Fatalf(context.TODO(), "%v", err) 191 } 192 // Every node gossips every cycle. 193 for _, node := range nodes { 194 if err := node.Gossip.AddInfo( 195 node.Addr().String(), 196 encoding.EncodeUint64Ascending(nil, uint64(cycle)), 197 time.Hour, 198 ); err != nil { 199 log.Fatalf(context.TODO(), "%v", err) 200 } 201 node.Gossip.SimulationCycle() 202 } 203 // If the simCallback returns false, we're done with the 204 // simulation; exit the loop. This condition is tested here 205 // instead of in the for statement in order to guarantee 206 // we run at least one iteration of this loop in order to 207 // gossip the cluster ID and sentinel. 208 if !simCallback(cycle, n) { 209 break 210 } 211 time.Sleep(5 * time.Millisecond) 212 } 213 log.Infof(context.TODO(), "gossip network simulation: total infos sent=%d, received=%d", n.infosSent(), n.infosReceived()) 214 } 215 216 // Start starts all gossip nodes. 217 // TODO(spencer): make all methods in Network return errors instead of 218 // fatal logging. 219 func (n *Network) Start() { 220 if n.started { 221 return 222 } 223 n.started = true 224 for _, node := range n.Nodes { 225 if err := n.StartNode(node); err != nil { 226 log.Fatalf(context.TODO(), "%v", err) 227 } 228 } 229 } 230 231 // RunUntilFullyConnected blocks until the gossip network has received 232 // gossip from every other node in the network. It returns the gossip 233 // cycle at which the network became fully connected. 234 func (n *Network) RunUntilFullyConnected() int { 235 var connectedAtCycle int 236 n.SimulateNetwork(func(cycle int, network *Network) bool { 237 if network.IsNetworkConnected() { 238 connectedAtCycle = cycle 239 return false 240 } 241 return true 242 }) 243 return connectedAtCycle 244 } 245 246 // IsNetworkConnected returns true if the network is fully connected 247 // with no partitions (i.e. every node knows every other node's 248 // network address). 249 func (n *Network) IsNetworkConnected() bool { 250 for _, leftNode := range n.Nodes { 251 for _, rightNode := range n.Nodes { 252 if _, err := leftNode.Gossip.GetInfo(gossip.MakeNodeIDKey(rightNode.Gossip.NodeID.Get())); err != nil { 253 return false 254 } 255 } 256 } 257 return true 258 } 259 260 // infosSent returns the total count of infos sent from all nodes in 261 // the network. 262 func (n *Network) infosSent() int { 263 var count int64 264 for _, node := range n.Nodes { 265 count += node.Gossip.GetNodeMetrics().InfosSent.Counter.Count() 266 } 267 return int(count) 268 } 269 270 // infosReceived returns the total count of infos received from all 271 // nodes in the network. 272 func (n *Network) infosReceived() int { 273 var count int64 274 for _, node := range n.Nodes { 275 count += node.Gossip.GetNodeMetrics().InfosReceived.Counter.Count() 276 } 277 return int(count) 278 }