github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/rpc/heartbeat.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rpc 12 13 import ( 14 "context" 15 "fmt" 16 "time" 17 18 "github.com/cockroachdb/cockroach/pkg/base" 19 "github.com/cockroachdb/cockroach/pkg/clusterversion" 20 "github.com/cockroachdb/cockroach/pkg/roachpb" 21 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 22 "github.com/cockroachdb/cockroach/pkg/util/hlc" 23 "github.com/cockroachdb/cockroach/pkg/util/log" 24 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 25 "github.com/cockroachdb/cockroach/pkg/util/uuid" 26 "github.com/cockroachdb/errors" 27 ) 28 29 func (r RemoteOffset) measuredAt() time.Time { 30 return timeutil.Unix(0, r.MeasuredAt) 31 } 32 33 // String formats the RemoteOffset for human readability. 34 func (r RemoteOffset) String() string { 35 return fmt.Sprintf("off=%s, err=%s, at=%s", time.Duration(r.Offset), time.Duration(r.Uncertainty), r.measuredAt()) 36 } 37 38 // A HeartbeatService exposes a method to echo its request params. It doubles 39 // as a way to measure the offset of the server from other nodes. It uses the 40 // clock to return the server time every heartbeat. It also keeps track of 41 // remote clocks sent to it by storing them in the remoteClockMonitor. 42 type HeartbeatService struct { 43 // Provides the nanosecond unix epoch timestamp of the processor. 44 clock *hlc.Clock 45 // A pointer to the RemoteClockMonitor configured in the RPC Context, 46 // shared by rpc clients, to keep track of remote clock measurements. 47 remoteClockMonitor *RemoteClockMonitor 48 49 clusterID *base.ClusterIDContainer 50 nodeID *base.NodeIDContainer 51 settings *cluster.Settings 52 53 clusterName string 54 disableClusterNameVerification bool 55 56 // TestingAllowNamedRPCToAnonymousServer, when defined (in tests), 57 // disables errors in case a heartbeat requests a specific node ID but 58 // the remote node doesn't have a node ID yet. This testing knob is 59 // currently used by the multiTestContext which does not suitably 60 // populate separate node IDs for each heartbeat service. 61 testingAllowNamedRPCToAnonymousServer bool 62 } 63 64 func checkClusterName(clusterName string, peerName string) error { 65 if clusterName != peerName { 66 var err error 67 if clusterName == "" { 68 err = errors.Errorf("peer node expects cluster name %q, use --cluster-name to configure", peerName) 69 } else if peerName == "" { 70 err = errors.New("peer node does not have a cluster name configured, cannot use --cluster-name") 71 } else { 72 err = errors.Errorf( 73 "local cluster name %q does not match peer cluster name %q", clusterName, peerName) 74 } 75 log.Shoutf(context.Background(), log.Severity_ERROR, "%v", err) 76 return err 77 } 78 return nil 79 } 80 81 func checkVersion(ctx context.Context, st *cluster.Settings, peerVersion roachpb.Version) error { 82 activeVersion := st.Version.ActiveVersionOrEmpty(ctx) 83 if activeVersion == (clusterversion.ClusterVersion{}) { 84 // Cluster version has not yet been determined. 85 return nil 86 } 87 if peerVersion == (roachpb.Version{}) { 88 return errors.Errorf( 89 "cluster requires at least version %s, but peer did not provide a version", activeVersion) 90 } 91 if peerVersion.Less(activeVersion.Version) { 92 return errors.Errorf( 93 "cluster requires at least version %s, but peer has version %s", activeVersion, peerVersion) 94 } 95 return nil 96 } 97 98 // Ping echos the contents of the request to the response, and returns the 99 // server's current clock value, allowing the requester to measure its clock. 100 // The requester should also estimate its offset from this server along 101 // with the requester's address. 102 func (hs *HeartbeatService) Ping(ctx context.Context, args *PingRequest) (*PingResponse, error) { 103 if log.V(2) { 104 log.Infof(ctx, "received heartbeat: %+v vs local cluster %+v node %+v", args, hs.clusterID, hs.nodeID) 105 } 106 // Check that cluster IDs match. 107 clusterID := hs.clusterID.Get() 108 if args.ClusterID != nil && *args.ClusterID != uuid.Nil && clusterID != uuid.Nil { 109 // There is a cluster ID on both sides. Use that to verify the connection. 110 // 111 // Note: we could be checking the cluster name here too, however 112 // for UX reason it is better to check it on the other side (the side 113 // initiating the connection), so that the user of a newly started 114 // node gets a chance to see a cluster name mismatch as an error message 115 // on their side. 116 if *args.ClusterID != clusterID { 117 return nil, errors.Errorf( 118 "client cluster ID %q doesn't match server cluster ID %q", args.ClusterID, clusterID) 119 } 120 } 121 // Check that node IDs match. 122 var nodeID roachpb.NodeID 123 if hs.nodeID != nil { 124 nodeID = hs.nodeID.Get() 125 } 126 if args.NodeID != 0 && (!hs.testingAllowNamedRPCToAnonymousServer || nodeID != 0) && args.NodeID != nodeID { 127 // If nodeID != 0, the situation is clear (we are checking that 128 // the other side is talking to the right node). 129 // 130 // If nodeID == 0 this means that this node (serving the 131 // heartbeat) doesn't have a node ID yet. Then we can't serve 132 // connections for other nodes that want a specific node ID, 133 // however we can still serve connections that don't need a node 134 // ID, e.g. during initial gossip. 135 return nil, errors.Errorf( 136 "client requested node ID %d doesn't match server node ID %d", args.NodeID, nodeID) 137 } 138 139 // Check version compatibility. 140 if err := checkVersion(ctx, hs.settings, args.ServerVersion); err != nil { 141 return nil, errors.Wrap(err, "version compatibility check failed on ping request") 142 } 143 144 // Enforce that clock max offsets are identical between nodes. 145 // Commit suicide in the event that this is ever untrue. 146 // This check is ignored if either offset is set to 0 (for unittests). 147 // Note that we validated this connection already. Different clusters 148 // could very well have different max offsets. 149 mo, amo := hs.clock.MaxOffset(), time.Duration(args.MaxOffsetNanos) 150 if mo != 0 && amo != 0 && mo != amo { 151 panic(fmt.Sprintf("locally configured maximum clock offset (%s) "+ 152 "does not match that of node %s (%s)", mo, args.Addr, amo)) 153 } 154 155 serverOffset := args.Offset 156 // The server offset should be the opposite of the client offset. 157 serverOffset.Offset = -serverOffset.Offset 158 hs.remoteClockMonitor.UpdateOffset(ctx, args.Addr, serverOffset, 0 /* roundTripLatency */) 159 return &PingResponse{ 160 Pong: args.Ping, 161 ServerTime: hs.clock.PhysicalNow(), 162 ServerVersion: hs.settings.Version.BinaryVersion(), 163 ClusterName: hs.clusterName, 164 DisableClusterNameVerification: hs.disableClusterNameVerification, 165 }, nil 166 }