github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/rpc/heartbeat.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rpc
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"time"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/base"
    19  	"github.com/cockroachdb/cockroach/pkg/clusterversion"
    20  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    21  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    22  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    25  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    26  	"github.com/cockroachdb/errors"
    27  )
    28  
    29  func (r RemoteOffset) measuredAt() time.Time {
    30  	return timeutil.Unix(0, r.MeasuredAt)
    31  }
    32  
    33  // String formats the RemoteOffset for human readability.
    34  func (r RemoteOffset) String() string {
    35  	return fmt.Sprintf("off=%s, err=%s, at=%s", time.Duration(r.Offset), time.Duration(r.Uncertainty), r.measuredAt())
    36  }
    37  
    38  // A HeartbeatService exposes a method to echo its request params. It doubles
    39  // as a way to measure the offset of the server from other nodes. It uses the
    40  // clock to return the server time every heartbeat. It also keeps track of
    41  // remote clocks sent to it by storing them in the remoteClockMonitor.
    42  type HeartbeatService struct {
    43  	// Provides the nanosecond unix epoch timestamp of the processor.
    44  	clock *hlc.Clock
    45  	// A pointer to the RemoteClockMonitor configured in the RPC Context,
    46  	// shared by rpc clients, to keep track of remote clock measurements.
    47  	remoteClockMonitor *RemoteClockMonitor
    48  
    49  	clusterID *base.ClusterIDContainer
    50  	nodeID    *base.NodeIDContainer
    51  	settings  *cluster.Settings
    52  
    53  	clusterName                    string
    54  	disableClusterNameVerification bool
    55  
    56  	// TestingAllowNamedRPCToAnonymousServer, when defined (in tests),
    57  	// disables errors in case a heartbeat requests a specific node ID but
    58  	// the remote node doesn't have a node ID yet. This testing knob is
    59  	// currently used by the multiTestContext which does not suitably
    60  	// populate separate node IDs for each heartbeat service.
    61  	testingAllowNamedRPCToAnonymousServer bool
    62  }
    63  
    64  func checkClusterName(clusterName string, peerName string) error {
    65  	if clusterName != peerName {
    66  		var err error
    67  		if clusterName == "" {
    68  			err = errors.Errorf("peer node expects cluster name %q, use --cluster-name to configure", peerName)
    69  		} else if peerName == "" {
    70  			err = errors.New("peer node does not have a cluster name configured, cannot use --cluster-name")
    71  		} else {
    72  			err = errors.Errorf(
    73  				"local cluster name %q does not match peer cluster name %q", clusterName, peerName)
    74  		}
    75  		log.Shoutf(context.Background(), log.Severity_ERROR, "%v", err)
    76  		return err
    77  	}
    78  	return nil
    79  }
    80  
    81  func checkVersion(ctx context.Context, st *cluster.Settings, peerVersion roachpb.Version) error {
    82  	activeVersion := st.Version.ActiveVersionOrEmpty(ctx)
    83  	if activeVersion == (clusterversion.ClusterVersion{}) {
    84  		// Cluster version has not yet been determined.
    85  		return nil
    86  	}
    87  	if peerVersion == (roachpb.Version{}) {
    88  		return errors.Errorf(
    89  			"cluster requires at least version %s, but peer did not provide a version", activeVersion)
    90  	}
    91  	if peerVersion.Less(activeVersion.Version) {
    92  		return errors.Errorf(
    93  			"cluster requires at least version %s, but peer has version %s", activeVersion, peerVersion)
    94  	}
    95  	return nil
    96  }
    97  
    98  // Ping echos the contents of the request to the response, and returns the
    99  // server's current clock value, allowing the requester to measure its clock.
   100  // The requester should also estimate its offset from this server along
   101  // with the requester's address.
   102  func (hs *HeartbeatService) Ping(ctx context.Context, args *PingRequest) (*PingResponse, error) {
   103  	if log.V(2) {
   104  		log.Infof(ctx, "received heartbeat: %+v vs local cluster %+v node %+v", args, hs.clusterID, hs.nodeID)
   105  	}
   106  	// Check that cluster IDs match.
   107  	clusterID := hs.clusterID.Get()
   108  	if args.ClusterID != nil && *args.ClusterID != uuid.Nil && clusterID != uuid.Nil {
   109  		// There is a cluster ID on both sides. Use that to verify the connection.
   110  		//
   111  		// Note: we could be checking the cluster name here too, however
   112  		// for UX reason it is better to check it on the other side (the side
   113  		// initiating the connection), so that the user of a newly started
   114  		// node gets a chance to see a cluster name mismatch as an error message
   115  		// on their side.
   116  		if *args.ClusterID != clusterID {
   117  			return nil, errors.Errorf(
   118  				"client cluster ID %q doesn't match server cluster ID %q", args.ClusterID, clusterID)
   119  		}
   120  	}
   121  	// Check that node IDs match.
   122  	var nodeID roachpb.NodeID
   123  	if hs.nodeID != nil {
   124  		nodeID = hs.nodeID.Get()
   125  	}
   126  	if args.NodeID != 0 && (!hs.testingAllowNamedRPCToAnonymousServer || nodeID != 0) && args.NodeID != nodeID {
   127  		// If nodeID != 0, the situation is clear (we are checking that
   128  		// the other side is talking to the right node).
   129  		//
   130  		// If nodeID == 0 this means that this node (serving the
   131  		// heartbeat) doesn't have a node ID yet. Then we can't serve
   132  		// connections for other nodes that want a specific node ID,
   133  		// however we can still serve connections that don't need a node
   134  		// ID, e.g. during initial gossip.
   135  		return nil, errors.Errorf(
   136  			"client requested node ID %d doesn't match server node ID %d", args.NodeID, nodeID)
   137  	}
   138  
   139  	// Check version compatibility.
   140  	if err := checkVersion(ctx, hs.settings, args.ServerVersion); err != nil {
   141  		return nil, errors.Wrap(err, "version compatibility check failed on ping request")
   142  	}
   143  
   144  	// Enforce that clock max offsets are identical between nodes.
   145  	// Commit suicide in the event that this is ever untrue.
   146  	// This check is ignored if either offset is set to 0 (for unittests).
   147  	// Note that we validated this connection already. Different clusters
   148  	// could very well have different max offsets.
   149  	mo, amo := hs.clock.MaxOffset(), time.Duration(args.MaxOffsetNanos)
   150  	if mo != 0 && amo != 0 && mo != amo {
   151  		panic(fmt.Sprintf("locally configured maximum clock offset (%s) "+
   152  			"does not match that of node %s (%s)", mo, args.Addr, amo))
   153  	}
   154  
   155  	serverOffset := args.Offset
   156  	// The server offset should be the opposite of the client offset.
   157  	serverOffset.Offset = -serverOffset.Offset
   158  	hs.remoteClockMonitor.UpdateOffset(ctx, args.Addr, serverOffset, 0 /* roundTripLatency */)
   159  	return &PingResponse{
   160  		Pong:                           args.Ping,
   161  		ServerTime:                     hs.clock.PhysicalNow(),
   162  		ServerVersion:                  hs.settings.Version.BinaryVersion(),
   163  		ClusterName:                    hs.clusterName,
   164  		DisableClusterNameVerification: hs.disableClusterNameVerification,
   165  	}, nil
   166  }