github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/gossip/server.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package gossip 12 13 import ( 14 "context" 15 "math/rand" 16 "net" 17 "sync" 18 "time" 19 20 "github.com/cockroachdb/cockroach/pkg/base" 21 "github.com/cockroachdb/cockroach/pkg/roachpb" 22 "github.com/cockroachdb/cockroach/pkg/util" 23 "github.com/cockroachdb/cockroach/pkg/util/log" 24 "github.com/cockroachdb/cockroach/pkg/util/metric" 25 "github.com/cockroachdb/cockroach/pkg/util/stop" 26 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 27 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 28 "github.com/cockroachdb/cockroach/pkg/util/uuid" 29 "github.com/cockroachdb/errors" 30 ) 31 32 type serverInfo struct { 33 createdAt time.Time 34 peerID roachpb.NodeID 35 } 36 37 // server maintains an array of connected peers to which it gossips 38 // newly arrived information on a periodic basis. 39 type server struct { 40 log.AmbientContext 41 42 clusterID *base.ClusterIDContainer 43 NodeID *base.NodeIDContainer 44 45 stopper *stop.Stopper 46 47 mu struct { 48 syncutil.RWMutex 49 is *infoStore // The backing infostore 50 incoming nodeSet // Incoming client node IDs 51 nodeMap map[util.UnresolvedAddr]serverInfo // Incoming client's local address -> serverInfo 52 // ready broadcasts a wakeup to waiting gossip requests. This is done 53 // via closing the current ready channel and opening a new one. This 54 // is required due to the fact that condition variables are not 55 // composable. There's an open proposal to add them: 56 // https://github.com/golang/go/issues/16620 57 ready chan struct{} 58 } 59 tighten chan struct{} // Sent on when we may want to tighten the network 60 61 nodeMetrics Metrics 62 serverMetrics Metrics 63 64 simulationCycler *sync.Cond // Used when simulating the network to signal next cycle 65 } 66 67 // newServer creates and returns a server struct. 68 func newServer( 69 ambient log.AmbientContext, 70 clusterID *base.ClusterIDContainer, 71 nodeID *base.NodeIDContainer, 72 stopper *stop.Stopper, 73 registry *metric.Registry, 74 ) *server { 75 s := &server{ 76 AmbientContext: ambient, 77 clusterID: clusterID, 78 NodeID: nodeID, 79 stopper: stopper, 80 tighten: make(chan struct{}, 1), 81 nodeMetrics: makeMetrics(), 82 serverMetrics: makeMetrics(), 83 } 84 85 s.mu.is = newInfoStore(s.AmbientContext, nodeID, util.UnresolvedAddr{}, stopper) 86 s.mu.incoming = makeNodeSet(minPeers, metric.NewGauge(MetaConnectionsIncomingGauge)) 87 s.mu.nodeMap = make(map[util.UnresolvedAddr]serverInfo) 88 s.mu.ready = make(chan struct{}) 89 90 registry.AddMetric(s.mu.incoming.gauge) 91 registry.AddMetricStruct(s.nodeMetrics) 92 93 return s 94 } 95 96 // GetNodeMetrics returns this server's node metrics struct. 97 func (s *server) GetNodeMetrics() *Metrics { 98 return &s.nodeMetrics 99 } 100 101 // Gossip receives gossiped information from a peer node. 102 // The received delta is combined with the infostore, and this 103 // node's own gossip is returned to requesting client. 104 func (s *server) Gossip(stream Gossip_GossipServer) error { 105 args, err := stream.Recv() 106 if err != nil { 107 return err 108 } 109 if (args.ClusterID != uuid.UUID{}) && args.ClusterID != s.clusterID.Get() { 110 return errors.Errorf("gossip connection refused from different cluster %s", args.ClusterID) 111 } 112 113 ctx, cancel := context.WithCancel(s.AnnotateCtx(stream.Context())) 114 defer cancel() 115 syncChan := make(chan struct{}, 1) 116 send := func(reply *Response) error { 117 select { 118 case <-ctx.Done(): 119 return ctx.Err() 120 case syncChan <- struct{}{}: 121 defer func() { <-syncChan }() 122 123 bytesSent := int64(reply.Size()) 124 infoCount := int64(len(reply.Delta)) 125 s.nodeMetrics.BytesSent.Inc(bytesSent) 126 s.nodeMetrics.InfosSent.Inc(infoCount) 127 s.serverMetrics.BytesSent.Inc(bytesSent) 128 s.serverMetrics.InfosSent.Inc(infoCount) 129 130 return stream.Send(reply) 131 } 132 } 133 134 defer func() { syncChan <- struct{}{} }() 135 136 errCh := make(chan error, 1) 137 138 // Starting workers in a task prevents data races during shutdown. 139 if err := s.stopper.RunTask(ctx, "gossip.server: receiver", func(ctx context.Context) { 140 s.stopper.RunWorker(ctx, func(ctx context.Context) { 141 errCh <- s.gossipReceiver(ctx, &args, send, stream.Recv) 142 }) 143 }); err != nil { 144 return err 145 } 146 147 reply := new(Response) 148 149 for init := true; ; init = false { 150 s.mu.Lock() 151 // Store the old ready so that if it gets replaced with a new one 152 // (once the lock is released) and is closed, we still trigger the 153 // select below. 154 ready := s.mu.ready 155 delta := s.mu.is.delta(args.HighWaterStamps) 156 if init { 157 s.mu.is.populateMostDistantMarkers(delta) 158 } 159 if args.HighWaterStamps == nil { 160 args.HighWaterStamps = make(map[roachpb.NodeID]int64) 161 } 162 163 // Send a response if this is the first response on the connection, or if 164 // there are deltas to send. The first condition is necessary to make sure 165 // the remote node receives our high water stamps in a timely fashion. 166 if infoCount := len(delta); init || infoCount > 0 { 167 if log.V(1) { 168 log.Infof(ctx, "returning %d info(s) to n%d: %s", 169 infoCount, args.NodeID, extractKeys(delta)) 170 } 171 // Ensure that the high water stamps for the remote client are kept up to 172 // date so that we avoid resending the same gossip infos as infos are 173 // updated locally. 174 for _, i := range delta { 175 ratchetHighWaterStamp(args.HighWaterStamps, i.NodeID, i.OrigStamp) 176 } 177 178 *reply = Response{ 179 NodeID: s.NodeID.Get(), 180 HighWaterStamps: s.mu.is.getHighWaterStamps(), 181 Delta: delta, 182 } 183 184 s.mu.Unlock() 185 if err := send(reply); err != nil { 186 return err 187 } 188 s.mu.Lock() 189 } 190 191 s.mu.Unlock() 192 193 select { 194 case <-s.stopper.ShouldQuiesce(): 195 return nil 196 case err := <-errCh: 197 return err 198 case <-ready: 199 } 200 } 201 } 202 203 func (s *server) gossipReceiver( 204 ctx context.Context, 205 argsPtr **Request, 206 senderFn func(*Response) error, 207 receiverFn func() (*Request, error), 208 ) error { 209 s.mu.Lock() 210 defer s.mu.Unlock() 211 212 reply := new(Response) 213 214 // Track whether we've decided whether or not to admit the gossip connection 215 // from this node. We only want to do this once so that we can do a duplicate 216 // connection check based on node ID here. 217 nodeIdentified := false 218 219 // This loop receives gossip from the client. It does not attempt to send the 220 // server's gossip to the client. 221 for { 222 args := *argsPtr 223 if args.NodeID == 0 { 224 // Let the connection through so that the client can get a node ID. Once it 225 // has one, we'll run the logic below to decide whether to keep the 226 // connection to it or to forward it elsewhere. 227 log.Infof(ctx, "received initial cluster-verification connection from %s", args.Addr) 228 } else if !nodeIdentified { 229 nodeIdentified = true 230 231 // Decide whether or not we can accept the incoming connection 232 // as a permanent peer. 233 if args.NodeID == s.NodeID.Get() { 234 // This is an incoming loopback connection which should be closed by 235 // the client. 236 if log.V(2) { 237 log.Infof(ctx, "ignoring gossip from n%d (loopback)", args.NodeID) 238 } 239 } else if _, ok := s.mu.nodeMap[args.Addr]; ok { 240 // This is a duplicate incoming connection from the same node as an existing 241 // connection. This can happen when bootstrap connections are initiated 242 // through a load balancer. 243 if log.V(2) { 244 log.Infof(ctx, "duplicate connection received from n%d at %s", args.NodeID, args.Addr) 245 } 246 return errors.Errorf("duplicate connection from node at %s", args.Addr) 247 } else if s.mu.incoming.hasSpace() { 248 log.VEventf(ctx, 2, "adding n%d to incoming set", args.NodeID) 249 250 s.mu.incoming.addNode(args.NodeID) 251 s.mu.nodeMap[args.Addr] = serverInfo{ 252 peerID: args.NodeID, 253 createdAt: timeutil.Now(), 254 } 255 256 defer func(nodeID roachpb.NodeID, addr util.UnresolvedAddr) { 257 log.VEventf(ctx, 2, "removing n%d from incoming set", args.NodeID) 258 s.mu.incoming.removeNode(nodeID) 259 delete(s.mu.nodeMap, addr) 260 }(args.NodeID, args.Addr) 261 } else { 262 // If we don't have any space left, forward the client along to a peer. 263 var alternateAddr util.UnresolvedAddr 264 var alternateNodeID roachpb.NodeID 265 // Choose a random peer for forwarding. 266 altIdx := rand.Intn(len(s.mu.nodeMap)) 267 for addr, info := range s.mu.nodeMap { 268 if altIdx == 0 { 269 alternateAddr = addr 270 alternateNodeID = info.peerID 271 break 272 } 273 altIdx-- 274 } 275 276 s.nodeMetrics.ConnectionsRefused.Inc(1) 277 log.Infof(ctx, "refusing gossip from n%d (max %d conns); forwarding to n%d (%s)", 278 args.NodeID, s.mu.incoming.maxSize, alternateNodeID, alternateAddr) 279 280 *reply = Response{ 281 NodeID: s.NodeID.Get(), 282 AlternateAddr: &alternateAddr, 283 AlternateNodeID: alternateNodeID, 284 } 285 286 s.mu.Unlock() 287 err := senderFn(reply) 288 s.mu.Lock() 289 // Naively, we would return err here unconditionally, but that 290 // introduces a race. Specifically, the client may observe the 291 // end of the connection before it has a chance to receive and 292 // process this message, which instructs it to hang up anyway. 293 // Instead, we send the message and proceed to gossip 294 // normally, depending on the client to end the connection. 295 if err != nil { 296 return err 297 } 298 } 299 } 300 301 bytesReceived := int64(args.Size()) 302 infosReceived := int64(len(args.Delta)) 303 s.nodeMetrics.BytesReceived.Inc(bytesReceived) 304 s.nodeMetrics.InfosReceived.Inc(infosReceived) 305 s.serverMetrics.BytesReceived.Inc(bytesReceived) 306 s.serverMetrics.InfosReceived.Inc(infosReceived) 307 308 freshCount, err := s.mu.is.combine(args.Delta, args.NodeID) 309 if err != nil { 310 log.Warningf(ctx, "failed to fully combine gossip delta from n%d: %s", args.NodeID, err) 311 } 312 if log.V(1) { 313 log.Infof(ctx, "received %s from n%d (%d fresh)", extractKeys(args.Delta), args.NodeID, freshCount) 314 } 315 s.maybeTightenLocked() 316 317 *reply = Response{ 318 NodeID: s.NodeID.Get(), 319 HighWaterStamps: s.mu.is.getHighWaterStamps(), 320 } 321 322 s.mu.Unlock() 323 err = senderFn(reply) 324 s.mu.Lock() 325 if err != nil { 326 return err 327 } 328 329 if cycler := s.simulationCycler; cycler != nil { 330 cycler.Wait() 331 } 332 333 s.mu.Unlock() 334 recvArgs, err := receiverFn() 335 s.mu.Lock() 336 if err != nil { 337 return err 338 } 339 340 // *argsPtr holds the remote peer state; we need to update it whenever we 341 // receive a new non-nil request. We avoid assigning to *argsPtr directly 342 // because the gossip sender above has closed over *argsPtr and will NPE if 343 // *argsPtr were set to nil. 344 mergeHighWaterStamps(&recvArgs.HighWaterStamps, (*argsPtr).HighWaterStamps) 345 *argsPtr = recvArgs 346 } 347 } 348 349 func (s *server) maybeTightenLocked() { 350 select { 351 case s.tighten <- struct{}{}: 352 default: 353 } 354 } 355 356 // start initializes the infostore with the rpc server address and 357 // then begins processing connecting clients in an infinite select 358 // loop via goroutine. Periodically, clients connected and awaiting 359 // the next round of gossip are awoken via the conditional variable. 360 func (s *server) start(addr net.Addr) { 361 s.mu.Lock() 362 defer s.mu.Unlock() 363 s.mu.is.NodeAddr = util.MakeUnresolvedAddr(addr.Network(), addr.String()) 364 365 broadcast := func() { 366 // Close the old ready and open a new one. This will broadcast to all 367 // receivers and setup a fresh channel to replace the closed one. 368 s.mu.Lock() 369 defer s.mu.Unlock() 370 ready := make(chan struct{}) 371 close(s.mu.ready) 372 s.mu.ready = ready 373 } 374 375 // We require redundant callbacks here as the broadcast callback is 376 // propagating gossip infos to other nodes and needs to propagate the new 377 // expiration info. 378 unregister := s.mu.is.registerCallback(".*", func(_ string, _ roachpb.Value) { 379 broadcast() 380 }, Redundant) 381 382 s.stopper.RunWorker(context.TODO(), func(context.Context) { 383 <-s.stopper.ShouldQuiesce() 384 385 s.mu.Lock() 386 unregister() 387 s.mu.Unlock() 388 389 broadcast() 390 }) 391 } 392 393 func (s *server) status() ServerStatus { 394 s.mu.RLock() 395 defer s.mu.RUnlock() 396 397 var status ServerStatus 398 status.ConnStatus = make([]ConnStatus, 0, len(s.mu.nodeMap)) 399 status.MaxConns = int32(s.mu.incoming.maxSize) 400 status.MetricSnap = s.serverMetrics.Snapshot() 401 402 for addr, info := range s.mu.nodeMap { 403 status.ConnStatus = append(status.ConnStatus, ConnStatus{ 404 NodeID: info.peerID, 405 Address: addr.String(), 406 AgeNanos: timeutil.Since(info.createdAt).Nanoseconds(), 407 }) 408 } 409 return status 410 } 411 412 func roundSecs(d time.Duration) time.Duration { 413 return time.Duration(d.Seconds()+0.5) * time.Second 414 } 415 416 // GetNodeAddr returns the node's address stored in the Infostore. 417 func (s *server) GetNodeAddr() *util.UnresolvedAddr { 418 s.mu.RLock() 419 defer s.mu.RUnlock() 420 return &s.mu.is.NodeAddr 421 }