github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/gossip/client.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package gossip 12 13 import ( 14 "context" 15 "fmt" 16 "net" 17 "sync" 18 "time" 19 20 circuit "github.com/cockroachdb/circuitbreaker" 21 "github.com/cockroachdb/cockroach/pkg/roachpb" 22 "github.com/cockroachdb/cockroach/pkg/rpc" 23 "github.com/cockroachdb/cockroach/pkg/util" 24 "github.com/cockroachdb/cockroach/pkg/util/grpcutil" 25 "github.com/cockroachdb/cockroach/pkg/util/log" 26 "github.com/cockroachdb/cockroach/pkg/util/stop" 27 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 28 "github.com/cockroachdb/errors" 29 ) 30 31 // client is a client-side RPC connection to a gossip peer node. 32 type client struct { 33 log.AmbientContext 34 35 createdAt time.Time 36 peerID roachpb.NodeID // Peer node ID; 0 until first gossip response 37 resolvedPlaceholder bool // Whether we've resolved the nodeSet's placeholder for this client 38 addr net.Addr // Peer node network address 39 forwardAddr *util.UnresolvedAddr // Set if disconnected with an alternate addr 40 remoteHighWaterStamps map[roachpb.NodeID]int64 // Remote server's high water timestamps 41 closer chan struct{} // Client shutdown channel 42 clientMetrics Metrics 43 nodeMetrics Metrics 44 } 45 46 // extractKeys returns a string representation of a gossip delta's keys. 47 func extractKeys(delta map[string]*Info) string { 48 keys := make([]string, 0, len(delta)) 49 for key := range delta { 50 keys = append(keys, key) 51 } 52 return fmt.Sprintf("%s", keys) 53 } 54 55 // newClient creates and returns a client struct. 56 func newClient(ambient log.AmbientContext, addr net.Addr, nodeMetrics Metrics) *client { 57 return &client{ 58 AmbientContext: ambient, 59 createdAt: timeutil.Now(), 60 addr: addr, 61 remoteHighWaterStamps: map[roachpb.NodeID]int64{}, 62 closer: make(chan struct{}), 63 clientMetrics: makeMetrics(), 64 nodeMetrics: nodeMetrics, 65 } 66 } 67 68 // start dials the remote addr and commences gossip once connected. Upon exit, 69 // the client is sent on the disconnected channel. This method starts client 70 // processing in a goroutine and returns immediately. 71 func (c *client) startLocked( 72 g *Gossip, 73 disconnected chan *client, 74 rpcCtx *rpc.Context, 75 stopper *stop.Stopper, 76 breaker *circuit.Breaker, 77 ) { 78 // Add a placeholder for the new outgoing connection because we may not know 79 // the ID of the node we're connecting to yet. This will be resolved in 80 // (*client).handleResponse once we know the ID. 81 g.outgoing.addPlaceholder() 82 83 ctx, cancel := context.WithCancel(c.AnnotateCtx(context.Background())) 84 stopper.RunWorker(ctx, func(ctx context.Context) { 85 var wg sync.WaitGroup 86 defer func() { 87 // This closes the outgoing stream, causing any attempt to send or 88 // receive to return an error. 89 // 90 // Note: it is still possible for incoming gossip to be processed after 91 // this point. 92 cancel() 93 94 // The stream is closed, but there may still be some incoming gossip 95 // being processed. Wait until that is complete to avoid racing the 96 // client's removal against the discovery of its remote's node ID. 97 wg.Wait() 98 disconnected <- c 99 }() 100 101 consecFailures := breaker.ConsecFailures() 102 var stream Gossip_GossipClient 103 if err := breaker.Call(func() error { 104 // Note: avoid using `grpc.WithBlock` here. This code is already 105 // asynchronous from the caller's perspective, so the only effect of 106 // `WithBlock` here is blocking shutdown - at the time of this writing, 107 // that ends ups up making `kv` tests take twice as long. 108 conn, err := rpcCtx.GRPCUnvalidatedDial(c.addr.String()).Connect(ctx) 109 if err != nil { 110 return err 111 } 112 if stream, err = NewGossipClient(conn).Gossip(ctx); err != nil { 113 return err 114 } 115 return c.requestGossip(g, stream) 116 }, 0); err != nil { 117 if consecFailures == 0 { 118 log.Warningf(ctx, "failed to start gossip client to %s: %s", c.addr, err) 119 } 120 return 121 } 122 123 // Start gossiping. 124 log.Infof(ctx, "started gossip client to %s", c.addr) 125 if err := c.gossip(ctx, g, stream, stopper, &wg); err != nil { 126 if !grpcutil.IsClosedConnection(err) { 127 g.mu.RLock() 128 if c.peerID != 0 { 129 log.Infof(ctx, "closing client to n%d (%s): %s", c.peerID, c.addr, err) 130 } else { 131 log.Infof(ctx, "closing client to %s: %s", c.addr, err) 132 } 133 g.mu.RUnlock() 134 } 135 } 136 }) 137 } 138 139 // close stops the client gossip loop and returns immediately. 140 func (c *client) close() { 141 select { 142 case <-c.closer: 143 default: 144 close(c.closer) 145 } 146 } 147 148 // requestGossip requests the latest gossip from the remote server by 149 // supplying a map of this node's knowledge of other nodes' high water 150 // timestamps. 151 func (c *client) requestGossip(g *Gossip, stream Gossip_GossipClient) error { 152 g.mu.RLock() 153 args := &Request{ 154 NodeID: g.NodeID.Get(), 155 Addr: g.mu.is.NodeAddr, 156 HighWaterStamps: g.mu.is.getHighWaterStamps(), 157 ClusterID: g.clusterID.Get(), 158 } 159 g.mu.RUnlock() 160 161 bytesSent := int64(args.Size()) 162 c.clientMetrics.BytesSent.Inc(bytesSent) 163 c.nodeMetrics.BytesSent.Inc(bytesSent) 164 165 return stream.Send(args) 166 } 167 168 // sendGossip sends the latest gossip to the remote server, based on 169 // the remote server's notion of other nodes' high water timestamps. 170 func (c *client) sendGossip(g *Gossip, stream Gossip_GossipClient, firstReq bool) error { 171 g.mu.Lock() 172 delta := g.mu.is.delta(c.remoteHighWaterStamps) 173 if firstReq { 174 g.mu.is.populateMostDistantMarkers(delta) 175 } 176 if len(delta) > 0 { 177 // Ensure that the high water stamps for the remote server are kept up to 178 // date so that we avoid resending the same gossip infos as infos are 179 // updated locally. 180 for _, i := range delta { 181 ratchetHighWaterStamp(c.remoteHighWaterStamps, i.NodeID, i.OrigStamp) 182 } 183 184 args := Request{ 185 NodeID: g.NodeID.Get(), 186 Addr: g.mu.is.NodeAddr, 187 Delta: delta, 188 HighWaterStamps: g.mu.is.getHighWaterStamps(), 189 ClusterID: g.clusterID.Get(), 190 } 191 192 bytesSent := int64(args.Size()) 193 infosSent := int64(len(delta)) 194 c.clientMetrics.BytesSent.Inc(bytesSent) 195 c.clientMetrics.InfosSent.Inc(infosSent) 196 c.nodeMetrics.BytesSent.Inc(bytesSent) 197 c.nodeMetrics.InfosSent.Inc(infosSent) 198 199 if log.V(1) { 200 ctx := c.AnnotateCtx(stream.Context()) 201 if c.peerID != 0 { 202 log.Infof(ctx, "sending %s to n%d (%s)", extractKeys(args.Delta), c.peerID, c.addr) 203 } else { 204 log.Infof(ctx, "sending %s to %s", extractKeys(args.Delta), c.addr) 205 } 206 } 207 208 g.mu.Unlock() 209 return stream.Send(&args) 210 } 211 g.mu.Unlock() 212 return nil 213 } 214 215 // handleResponse handles errors, remote forwarding, and combines delta 216 // gossip infos from the remote server with this node's infostore. 217 func (c *client) handleResponse(ctx context.Context, g *Gossip, reply *Response) error { 218 g.mu.Lock() 219 defer g.mu.Unlock() 220 221 bytesReceived := int64(reply.Size()) 222 infosReceived := int64(len(reply.Delta)) 223 c.clientMetrics.BytesReceived.Inc(bytesReceived) 224 c.clientMetrics.InfosReceived.Inc(infosReceived) 225 c.nodeMetrics.BytesReceived.Inc(bytesReceived) 226 c.nodeMetrics.InfosReceived.Inc(infosReceived) 227 228 // Combine remote node's infostore delta with ours. 229 if reply.Delta != nil { 230 freshCount, err := g.mu.is.combine(reply.Delta, reply.NodeID) 231 if err != nil { 232 log.Warningf(ctx, "failed to fully combine delta from n%d: %s", reply.NodeID, err) 233 } 234 if infoCount := len(reply.Delta); infoCount > 0 { 235 if log.V(1) { 236 log.Infof(ctx, "received %s from n%d (%d fresh)", extractKeys(reply.Delta), reply.NodeID, freshCount) 237 } 238 } 239 g.maybeTightenLocked() 240 } 241 c.peerID = reply.NodeID 242 mergeHighWaterStamps(&c.remoteHighWaterStamps, reply.HighWaterStamps) 243 244 // If we haven't yet recorded which node ID we're connected to in the outgoing 245 // nodeSet, do so now. Note that we only want to do this if the peer has a 246 // node ID allocated (i.e. if it's nonzero), because otherwise it could change 247 // after we record it. 248 if !c.resolvedPlaceholder && c.peerID != 0 { 249 c.resolvedPlaceholder = true 250 g.outgoing.resolvePlaceholder(c.peerID) 251 } 252 253 // Handle remote forwarding. 254 if reply.AlternateAddr != nil { 255 if g.hasIncomingLocked(reply.AlternateNodeID) || g.hasOutgoingLocked(reply.AlternateNodeID) { 256 return errors.Errorf( 257 "received forward from n%d to n%d (%s); already have active connection, skipping", 258 reply.NodeID, reply.AlternateNodeID, reply.AlternateAddr) 259 } 260 // We try to resolve the address, but don't actually use the result. 261 // The certificates (if any) may only be valid for the unresolved 262 // address. 263 if _, err := reply.AlternateAddr.Resolve(); err != nil { 264 return errors.Errorf("unable to resolve alternate address %s for n%d: %s", 265 reply.AlternateAddr, reply.AlternateNodeID, err) 266 } 267 c.forwardAddr = reply.AlternateAddr 268 return errors.Errorf("received forward from n%d to %d (%s)", 269 reply.NodeID, reply.AlternateNodeID, reply.AlternateAddr) 270 } 271 272 // Check whether we're connected at this point. 273 g.signalConnectedLocked() 274 275 // Check whether this outgoing client is duplicating work already 276 // being done by an incoming client, either because an outgoing 277 // matches an incoming or the client is connecting to itself. 278 if nodeID := g.NodeID.Get(); nodeID == c.peerID { 279 return errors.Errorf("stopping outgoing client to n%d (%s); loopback connection", c.peerID, c.addr) 280 } else if g.hasIncomingLocked(c.peerID) && nodeID > c.peerID { 281 // To avoid mutual shutdown, we only shutdown our client if our 282 // node ID is higher than the peer's. 283 return errors.Errorf("stopping outgoing client to n%d (%s); already have incoming", c.peerID, c.addr) 284 } 285 286 return nil 287 } 288 289 // gossip loops, sending deltas of the infostore and receiving deltas 290 // in turn. If an alternate is proposed on response, the client addr 291 // is modified and method returns for forwarding by caller. 292 func (c *client) gossip( 293 ctx context.Context, 294 g *Gossip, 295 stream Gossip_GossipClient, 296 stopper *stop.Stopper, 297 wg *sync.WaitGroup, 298 ) error { 299 sendGossipChan := make(chan struct{}, 1) 300 301 // Register a callback for gossip updates. 302 updateCallback := func(_ string, _ roachpb.Value) { 303 select { 304 case sendGossipChan <- struct{}{}: 305 default: 306 } 307 } 308 309 errCh := make(chan error, 1) 310 initCh := make(chan struct{}, 1) 311 // This wait group is used to allow the caller to wait until gossip 312 // processing is terminated. 313 wg.Add(1) 314 stopper.RunWorker(ctx, func(ctx context.Context) { 315 defer wg.Done() 316 317 errCh <- func() error { 318 var peerID roachpb.NodeID 319 320 initCh := initCh 321 for init := true; ; init = false { 322 reply, err := stream.Recv() 323 if err != nil { 324 return err 325 } 326 if err := c.handleResponse(ctx, g, reply); err != nil { 327 return err 328 } 329 if init { 330 initCh <- struct{}{} 331 } 332 if peerID == 0 && c.peerID != 0 { 333 peerID = c.peerID 334 g.updateClients() 335 } 336 } 337 }() 338 }) 339 340 // We attempt to defer registration of the callback until we've heard a 341 // response from the remote node which will contain the remote's high water 342 // stamps. This prevents the client from sending all of its infos to the 343 // remote (which would happen if we don't know the remote's high water 344 // stamps). Unfortunately, versions of cockroach before 2.1 did not always 345 // send a response when receiving an incoming connection, so we also start a 346 // timer and perform initialization after 1s if we haven't heard from the 347 // remote. 348 var unregister func() 349 defer func() { 350 if unregister != nil { 351 unregister() 352 } 353 }() 354 maybeRegister := func() { 355 if unregister == nil { 356 // We require redundant callbacks here as the update callback is 357 // propagating gossip infos to other nodes and needs to propagate the new 358 // expiration info. 359 unregister = g.RegisterCallback(".*", updateCallback, Redundant) 360 } 361 } 362 initTimer := time.NewTimer(time.Second) 363 defer initTimer.Stop() 364 365 for count := 0; ; { 366 select { 367 case <-c.closer: 368 return nil 369 case <-stopper.ShouldStop(): 370 return nil 371 case err := <-errCh: 372 return err 373 case <-initCh: 374 maybeRegister() 375 case <-initTimer.C: 376 maybeRegister() 377 case <-sendGossipChan: 378 if err := c.sendGossip(g, stream, count == 0); err != nil { 379 return err 380 } 381 count++ 382 } 383 } 384 }