github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/gossip/client.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package gossip
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"net"
    17  	"sync"
    18  	"time"
    19  
    20  	circuit "github.com/cockroachdb/circuitbreaker"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/rpc"
    23  	"github.com/cockroachdb/cockroach/pkg/util"
    24  	"github.com/cockroachdb/cockroach/pkg/util/grpcutil"
    25  	"github.com/cockroachdb/cockroach/pkg/util/log"
    26  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    27  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    28  	"github.com/cockroachdb/errors"
    29  )
    30  
    31  // client is a client-side RPC connection to a gossip peer node.
    32  type client struct {
    33  	log.AmbientContext
    34  
    35  	createdAt             time.Time
    36  	peerID                roachpb.NodeID           // Peer node ID; 0 until first gossip response
    37  	resolvedPlaceholder   bool                     // Whether we've resolved the nodeSet's placeholder for this client
    38  	addr                  net.Addr                 // Peer node network address
    39  	forwardAddr           *util.UnresolvedAddr     // Set if disconnected with an alternate addr
    40  	remoteHighWaterStamps map[roachpb.NodeID]int64 // Remote server's high water timestamps
    41  	closer                chan struct{}            // Client shutdown channel
    42  	clientMetrics         Metrics
    43  	nodeMetrics           Metrics
    44  }
    45  
    46  // extractKeys returns a string representation of a gossip delta's keys.
    47  func extractKeys(delta map[string]*Info) string {
    48  	keys := make([]string, 0, len(delta))
    49  	for key := range delta {
    50  		keys = append(keys, key)
    51  	}
    52  	return fmt.Sprintf("%s", keys)
    53  }
    54  
    55  // newClient creates and returns a client struct.
    56  func newClient(ambient log.AmbientContext, addr net.Addr, nodeMetrics Metrics) *client {
    57  	return &client{
    58  		AmbientContext:        ambient,
    59  		createdAt:             timeutil.Now(),
    60  		addr:                  addr,
    61  		remoteHighWaterStamps: map[roachpb.NodeID]int64{},
    62  		closer:                make(chan struct{}),
    63  		clientMetrics:         makeMetrics(),
    64  		nodeMetrics:           nodeMetrics,
    65  	}
    66  }
    67  
    68  // start dials the remote addr and commences gossip once connected. Upon exit,
    69  // the client is sent on the disconnected channel. This method starts client
    70  // processing in a goroutine and returns immediately.
    71  func (c *client) startLocked(
    72  	g *Gossip,
    73  	disconnected chan *client,
    74  	rpcCtx *rpc.Context,
    75  	stopper *stop.Stopper,
    76  	breaker *circuit.Breaker,
    77  ) {
    78  	// Add a placeholder for the new outgoing connection because we may not know
    79  	// the ID of the node we're connecting to yet. This will be resolved in
    80  	// (*client).handleResponse once we know the ID.
    81  	g.outgoing.addPlaceholder()
    82  
    83  	ctx, cancel := context.WithCancel(c.AnnotateCtx(context.Background()))
    84  	stopper.RunWorker(ctx, func(ctx context.Context) {
    85  		var wg sync.WaitGroup
    86  		defer func() {
    87  			// This closes the outgoing stream, causing any attempt to send or
    88  			// receive to return an error.
    89  			//
    90  			// Note: it is still possible for incoming gossip to be processed after
    91  			// this point.
    92  			cancel()
    93  
    94  			// The stream is closed, but there may still be some incoming gossip
    95  			// being processed. Wait until that is complete to avoid racing the
    96  			// client's removal against the discovery of its remote's node ID.
    97  			wg.Wait()
    98  			disconnected <- c
    99  		}()
   100  
   101  		consecFailures := breaker.ConsecFailures()
   102  		var stream Gossip_GossipClient
   103  		if err := breaker.Call(func() error {
   104  			// Note: avoid using `grpc.WithBlock` here. This code is already
   105  			// asynchronous from the caller's perspective, so the only effect of
   106  			// `WithBlock` here is blocking shutdown - at the time of this writing,
   107  			// that ends ups up making `kv` tests take twice as long.
   108  			conn, err := rpcCtx.GRPCUnvalidatedDial(c.addr.String()).Connect(ctx)
   109  			if err != nil {
   110  				return err
   111  			}
   112  			if stream, err = NewGossipClient(conn).Gossip(ctx); err != nil {
   113  				return err
   114  			}
   115  			return c.requestGossip(g, stream)
   116  		}, 0); err != nil {
   117  			if consecFailures == 0 {
   118  				log.Warningf(ctx, "failed to start gossip client to %s: %s", c.addr, err)
   119  			}
   120  			return
   121  		}
   122  
   123  		// Start gossiping.
   124  		log.Infof(ctx, "started gossip client to %s", c.addr)
   125  		if err := c.gossip(ctx, g, stream, stopper, &wg); err != nil {
   126  			if !grpcutil.IsClosedConnection(err) {
   127  				g.mu.RLock()
   128  				if c.peerID != 0 {
   129  					log.Infof(ctx, "closing client to n%d (%s): %s", c.peerID, c.addr, err)
   130  				} else {
   131  					log.Infof(ctx, "closing client to %s: %s", c.addr, err)
   132  				}
   133  				g.mu.RUnlock()
   134  			}
   135  		}
   136  	})
   137  }
   138  
   139  // close stops the client gossip loop and returns immediately.
   140  func (c *client) close() {
   141  	select {
   142  	case <-c.closer:
   143  	default:
   144  		close(c.closer)
   145  	}
   146  }
   147  
   148  // requestGossip requests the latest gossip from the remote server by
   149  // supplying a map of this node's knowledge of other nodes' high water
   150  // timestamps.
   151  func (c *client) requestGossip(g *Gossip, stream Gossip_GossipClient) error {
   152  	g.mu.RLock()
   153  	args := &Request{
   154  		NodeID:          g.NodeID.Get(),
   155  		Addr:            g.mu.is.NodeAddr,
   156  		HighWaterStamps: g.mu.is.getHighWaterStamps(),
   157  		ClusterID:       g.clusterID.Get(),
   158  	}
   159  	g.mu.RUnlock()
   160  
   161  	bytesSent := int64(args.Size())
   162  	c.clientMetrics.BytesSent.Inc(bytesSent)
   163  	c.nodeMetrics.BytesSent.Inc(bytesSent)
   164  
   165  	return stream.Send(args)
   166  }
   167  
   168  // sendGossip sends the latest gossip to the remote server, based on
   169  // the remote server's notion of other nodes' high water timestamps.
   170  func (c *client) sendGossip(g *Gossip, stream Gossip_GossipClient, firstReq bool) error {
   171  	g.mu.Lock()
   172  	delta := g.mu.is.delta(c.remoteHighWaterStamps)
   173  	if firstReq {
   174  		g.mu.is.populateMostDistantMarkers(delta)
   175  	}
   176  	if len(delta) > 0 {
   177  		// Ensure that the high water stamps for the remote server are kept up to
   178  		// date so that we avoid resending the same gossip infos as infos are
   179  		// updated locally.
   180  		for _, i := range delta {
   181  			ratchetHighWaterStamp(c.remoteHighWaterStamps, i.NodeID, i.OrigStamp)
   182  		}
   183  
   184  		args := Request{
   185  			NodeID:          g.NodeID.Get(),
   186  			Addr:            g.mu.is.NodeAddr,
   187  			Delta:           delta,
   188  			HighWaterStamps: g.mu.is.getHighWaterStamps(),
   189  			ClusterID:       g.clusterID.Get(),
   190  		}
   191  
   192  		bytesSent := int64(args.Size())
   193  		infosSent := int64(len(delta))
   194  		c.clientMetrics.BytesSent.Inc(bytesSent)
   195  		c.clientMetrics.InfosSent.Inc(infosSent)
   196  		c.nodeMetrics.BytesSent.Inc(bytesSent)
   197  		c.nodeMetrics.InfosSent.Inc(infosSent)
   198  
   199  		if log.V(1) {
   200  			ctx := c.AnnotateCtx(stream.Context())
   201  			if c.peerID != 0 {
   202  				log.Infof(ctx, "sending %s to n%d (%s)", extractKeys(args.Delta), c.peerID, c.addr)
   203  			} else {
   204  				log.Infof(ctx, "sending %s to %s", extractKeys(args.Delta), c.addr)
   205  			}
   206  		}
   207  
   208  		g.mu.Unlock()
   209  		return stream.Send(&args)
   210  	}
   211  	g.mu.Unlock()
   212  	return nil
   213  }
   214  
   215  // handleResponse handles errors, remote forwarding, and combines delta
   216  // gossip infos from the remote server with this node's infostore.
   217  func (c *client) handleResponse(ctx context.Context, g *Gossip, reply *Response) error {
   218  	g.mu.Lock()
   219  	defer g.mu.Unlock()
   220  
   221  	bytesReceived := int64(reply.Size())
   222  	infosReceived := int64(len(reply.Delta))
   223  	c.clientMetrics.BytesReceived.Inc(bytesReceived)
   224  	c.clientMetrics.InfosReceived.Inc(infosReceived)
   225  	c.nodeMetrics.BytesReceived.Inc(bytesReceived)
   226  	c.nodeMetrics.InfosReceived.Inc(infosReceived)
   227  
   228  	// Combine remote node's infostore delta with ours.
   229  	if reply.Delta != nil {
   230  		freshCount, err := g.mu.is.combine(reply.Delta, reply.NodeID)
   231  		if err != nil {
   232  			log.Warningf(ctx, "failed to fully combine delta from n%d: %s", reply.NodeID, err)
   233  		}
   234  		if infoCount := len(reply.Delta); infoCount > 0 {
   235  			if log.V(1) {
   236  				log.Infof(ctx, "received %s from n%d (%d fresh)", extractKeys(reply.Delta), reply.NodeID, freshCount)
   237  			}
   238  		}
   239  		g.maybeTightenLocked()
   240  	}
   241  	c.peerID = reply.NodeID
   242  	mergeHighWaterStamps(&c.remoteHighWaterStamps, reply.HighWaterStamps)
   243  
   244  	// If we haven't yet recorded which node ID we're connected to in the outgoing
   245  	// nodeSet, do so now. Note that we only want to do this if the peer has a
   246  	// node ID allocated (i.e. if it's nonzero), because otherwise it could change
   247  	// after we record it.
   248  	if !c.resolvedPlaceholder && c.peerID != 0 {
   249  		c.resolvedPlaceholder = true
   250  		g.outgoing.resolvePlaceholder(c.peerID)
   251  	}
   252  
   253  	// Handle remote forwarding.
   254  	if reply.AlternateAddr != nil {
   255  		if g.hasIncomingLocked(reply.AlternateNodeID) || g.hasOutgoingLocked(reply.AlternateNodeID) {
   256  			return errors.Errorf(
   257  				"received forward from n%d to n%d (%s); already have active connection, skipping",
   258  				reply.NodeID, reply.AlternateNodeID, reply.AlternateAddr)
   259  		}
   260  		// We try to resolve the address, but don't actually use the result.
   261  		// The certificates (if any) may only be valid for the unresolved
   262  		// address.
   263  		if _, err := reply.AlternateAddr.Resolve(); err != nil {
   264  			return errors.Errorf("unable to resolve alternate address %s for n%d: %s",
   265  				reply.AlternateAddr, reply.AlternateNodeID, err)
   266  		}
   267  		c.forwardAddr = reply.AlternateAddr
   268  		return errors.Errorf("received forward from n%d to %d (%s)",
   269  			reply.NodeID, reply.AlternateNodeID, reply.AlternateAddr)
   270  	}
   271  
   272  	// Check whether we're connected at this point.
   273  	g.signalConnectedLocked()
   274  
   275  	// Check whether this outgoing client is duplicating work already
   276  	// being done by an incoming client, either because an outgoing
   277  	// matches an incoming or the client is connecting to itself.
   278  	if nodeID := g.NodeID.Get(); nodeID == c.peerID {
   279  		return errors.Errorf("stopping outgoing client to n%d (%s); loopback connection", c.peerID, c.addr)
   280  	} else if g.hasIncomingLocked(c.peerID) && nodeID > c.peerID {
   281  		// To avoid mutual shutdown, we only shutdown our client if our
   282  		// node ID is higher than the peer's.
   283  		return errors.Errorf("stopping outgoing client to n%d (%s); already have incoming", c.peerID, c.addr)
   284  	}
   285  
   286  	return nil
   287  }
   288  
   289  // gossip loops, sending deltas of the infostore and receiving deltas
   290  // in turn. If an alternate is proposed on response, the client addr
   291  // is modified and method returns for forwarding by caller.
   292  func (c *client) gossip(
   293  	ctx context.Context,
   294  	g *Gossip,
   295  	stream Gossip_GossipClient,
   296  	stopper *stop.Stopper,
   297  	wg *sync.WaitGroup,
   298  ) error {
   299  	sendGossipChan := make(chan struct{}, 1)
   300  
   301  	// Register a callback for gossip updates.
   302  	updateCallback := func(_ string, _ roachpb.Value) {
   303  		select {
   304  		case sendGossipChan <- struct{}{}:
   305  		default:
   306  		}
   307  	}
   308  
   309  	errCh := make(chan error, 1)
   310  	initCh := make(chan struct{}, 1)
   311  	// This wait group is used to allow the caller to wait until gossip
   312  	// processing is terminated.
   313  	wg.Add(1)
   314  	stopper.RunWorker(ctx, func(ctx context.Context) {
   315  		defer wg.Done()
   316  
   317  		errCh <- func() error {
   318  			var peerID roachpb.NodeID
   319  
   320  			initCh := initCh
   321  			for init := true; ; init = false {
   322  				reply, err := stream.Recv()
   323  				if err != nil {
   324  					return err
   325  				}
   326  				if err := c.handleResponse(ctx, g, reply); err != nil {
   327  					return err
   328  				}
   329  				if init {
   330  					initCh <- struct{}{}
   331  				}
   332  				if peerID == 0 && c.peerID != 0 {
   333  					peerID = c.peerID
   334  					g.updateClients()
   335  				}
   336  			}
   337  		}()
   338  	})
   339  
   340  	// We attempt to defer registration of the callback until we've heard a
   341  	// response from the remote node which will contain the remote's high water
   342  	// stamps. This prevents the client from sending all of its infos to the
   343  	// remote (which would happen if we don't know the remote's high water
   344  	// stamps). Unfortunately, versions of cockroach before 2.1 did not always
   345  	// send a response when receiving an incoming connection, so we also start a
   346  	// timer and perform initialization after 1s if we haven't heard from the
   347  	// remote.
   348  	var unregister func()
   349  	defer func() {
   350  		if unregister != nil {
   351  			unregister()
   352  		}
   353  	}()
   354  	maybeRegister := func() {
   355  		if unregister == nil {
   356  			// We require redundant callbacks here as the update callback is
   357  			// propagating gossip infos to other nodes and needs to propagate the new
   358  			// expiration info.
   359  			unregister = g.RegisterCallback(".*", updateCallback, Redundant)
   360  		}
   361  	}
   362  	initTimer := time.NewTimer(time.Second)
   363  	defer initTimer.Stop()
   364  
   365  	for count := 0; ; {
   366  		select {
   367  		case <-c.closer:
   368  			return nil
   369  		case <-stopper.ShouldStop():
   370  			return nil
   371  		case err := <-errCh:
   372  			return err
   373  		case <-initCh:
   374  			maybeRegister()
   375  		case <-initTimer.C:
   376  			maybeRegister()
   377  		case <-sendGossipChan:
   378  			if err := c.sendGossip(g, stream, count == 0); err != nil {
   379  				return err
   380  			}
   381  			count++
   382  		}
   383  	}
   384  }