github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/rpc/heartbeat_test.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rpc
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"regexp"
    17  	"testing"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/base"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    23  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    24  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    25  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    26  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    27  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    28  )
    29  
    30  func TestRemoteOffsetString(t *testing.T) {
    31  	defer leaktest.AfterTest(t)()
    32  	ro := RemoteOffset{
    33  		Offset:      -501584461,
    34  		Uncertainty: 351698,
    35  		MeasuredAt:  1430348776127420269,
    36  	}
    37  	expStr := "off=-501.584461ms, err=351.698µs, at=2015-04-29 23:06:16.127420269 +0000 UTC"
    38  	if str := ro.String(); str != expStr {
    39  		t.Errorf("expected %s; got %s", expStr, str)
    40  	}
    41  }
    42  
    43  func TestHeartbeatReply(t *testing.T) {
    44  	defer leaktest.AfterTest(t)()
    45  	manual := hlc.NewManualClock(5)
    46  	clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
    47  	st := cluster.MakeTestingClusterSettings()
    48  	heartbeat := &HeartbeatService{
    49  		clock:              clock,
    50  		remoteClockMonitor: newRemoteClockMonitor(clock, time.Hour, 0),
    51  		clusterID:          &base.ClusterIDContainer{},
    52  		settings:           st,
    53  	}
    54  
    55  	request := &PingRequest{
    56  		Ping:          "testPing",
    57  		ServerVersion: st.Version.BinaryVersion(),
    58  	}
    59  	response, err := heartbeat.Ping(context.Background(), request)
    60  	if err != nil {
    61  		t.Fatal(err)
    62  	}
    63  
    64  	if response.Pong != request.Ping {
    65  		t.Errorf("expected %s to be equal to %s", response.Pong, request.Ping)
    66  	}
    67  
    68  	if response.ServerTime != 5 {
    69  		t.Errorf("expected server time 5, instead %d", response.ServerTime)
    70  	}
    71  }
    72  
    73  // A ManualHeartbeatService allows manual control of when heartbeats occur.
    74  type ManualHeartbeatService struct {
    75  	clock              *hlc.Clock
    76  	remoteClockMonitor *RemoteClockMonitor
    77  	settings           *cluster.Settings
    78  	nodeID             *base.NodeIDContainer
    79  	// Heartbeats are processed when a value is sent here.
    80  	ready   chan error
    81  	stopper *stop.Stopper
    82  }
    83  
    84  // Ping waits until the heartbeat service is ready to respond to a Heartbeat.
    85  func (mhs *ManualHeartbeatService) Ping(
    86  	ctx context.Context, args *PingRequest,
    87  ) (*PingResponse, error) {
    88  	select {
    89  	case err := <-mhs.ready:
    90  		if err != nil {
    91  			return nil, err
    92  		}
    93  	case <-ctx.Done():
    94  		return nil, ctx.Err()
    95  	case <-mhs.stopper.ShouldStop():
    96  	}
    97  	hs := HeartbeatService{
    98  		clock:              mhs.clock,
    99  		remoteClockMonitor: mhs.remoteClockMonitor,
   100  		clusterID:          &base.ClusterIDContainer{},
   101  		settings:           mhs.settings,
   102  		nodeID:             mhs.nodeID,
   103  	}
   104  	return hs.Ping(ctx, args)
   105  }
   106  
   107  func TestManualHeartbeat(t *testing.T) {
   108  	defer leaktest.AfterTest(t)()
   109  	manual := hlc.NewManualClock(5)
   110  	clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
   111  	st := cluster.MakeTestingClusterSettings()
   112  	manualHeartbeat := &ManualHeartbeatService{
   113  		clock:              clock,
   114  		remoteClockMonitor: newRemoteClockMonitor(clock, time.Hour, 0),
   115  		ready:              make(chan error, 1),
   116  		settings:           st,
   117  	}
   118  	regularHeartbeat := &HeartbeatService{
   119  		clock:              clock,
   120  		remoteClockMonitor: newRemoteClockMonitor(clock, time.Hour, 0),
   121  		clusterID:          &base.ClusterIDContainer{},
   122  		settings:           st,
   123  	}
   124  
   125  	request := &PingRequest{
   126  		Ping:          "testManual",
   127  		ServerVersion: st.Version.BinaryVersion(),
   128  	}
   129  	manualHeartbeat.ready <- nil
   130  	ctx := context.Background()
   131  	regularResponse, err := regularHeartbeat.Ping(ctx, request)
   132  	if err != nil {
   133  		t.Fatal(err)
   134  	}
   135  	manualResponse, err := manualHeartbeat.Ping(ctx, request)
   136  	if err != nil {
   137  		t.Fatal(err)
   138  	}
   139  
   140  	// Ensure that the response is the same as with a normal heartbeat.
   141  	if manualResponse.Pong != regularResponse.Pong {
   142  		t.Errorf("expected pong %s, instead %s",
   143  			manualResponse.Pong, regularResponse.Pong)
   144  	}
   145  	if manualResponse.ServerTime != regularResponse.ServerTime {
   146  		t.Errorf("expected ServerTime %d, instead %d",
   147  			manualResponse.ServerTime, regularResponse.ServerTime)
   148  	}
   149  }
   150  
   151  func TestClockOffsetMismatch(t *testing.T) {
   152  	defer leaktest.AfterTest(t)()
   153  	defer func() {
   154  		if r := recover(); r != nil {
   155  			fmt.Println(r)
   156  			if match, _ := regexp.MatchString("locally configured maximum clock offset", r.(string)); !match {
   157  				t.Errorf("expected clock mismatch error")
   158  			}
   159  		}
   160  	}()
   161  
   162  	ctx := context.Background()
   163  
   164  	clock := hlc.NewClock(hlc.UnixNano, 250*time.Millisecond)
   165  	st := cluster.MakeTestingClusterSettings()
   166  	hs := &HeartbeatService{
   167  		clock:              clock,
   168  		remoteClockMonitor: newRemoteClockMonitor(clock, time.Hour, 0),
   169  		clusterID:          &base.ClusterIDContainer{},
   170  		settings:           st,
   171  	}
   172  	hs.clusterID.Set(ctx, uuid.Nil)
   173  
   174  	request := &PingRequest{
   175  		Ping:           "testManual",
   176  		Addr:           "test",
   177  		MaxOffsetNanos: (500 * time.Millisecond).Nanoseconds(),
   178  		ServerVersion:  st.Version.BinaryVersion(),
   179  	}
   180  	response, err := hs.Ping(context.Background(), request)
   181  	t.Fatalf("should not have reached but got response=%v err=%v", response, err)
   182  }
   183  
   184  func TestClusterIDCompare(t *testing.T) {
   185  	defer leaktest.AfterTest(t)()
   186  	uuid1, uuid2 := uuid.MakeV4(), uuid.MakeV4()
   187  	testData := []struct {
   188  		name            string
   189  		serverClusterID uuid.UUID
   190  		clientClusterID uuid.UUID
   191  		expectError     bool
   192  	}{
   193  		{"cluster IDs match", uuid1, uuid1, false},
   194  		{"their cluster ID missing", uuid1, uuid.Nil, false},
   195  		{"our cluster ID missing", uuid.Nil, uuid1, false},
   196  		{"both cluster IDs missing", uuid.Nil, uuid.Nil, false},
   197  		{"cluster ID mismatch", uuid1, uuid2, true},
   198  	}
   199  
   200  	manual := hlc.NewManualClock(5)
   201  	clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
   202  	st := cluster.MakeTestingClusterSettings()
   203  	heartbeat := &HeartbeatService{
   204  		clock:              clock,
   205  		remoteClockMonitor: newRemoteClockMonitor(clock, time.Hour, 0),
   206  		clusterID:          &base.ClusterIDContainer{},
   207  		settings:           st,
   208  	}
   209  
   210  	for _, td := range testData {
   211  		t.Run(td.name, func(t *testing.T) {
   212  			heartbeat.clusterID.Reset(td.serverClusterID)
   213  			request := &PingRequest{
   214  				Ping:          "testPing",
   215  				ClusterID:     &td.clientClusterID,
   216  				ServerVersion: st.Version.BinaryVersion(),
   217  			}
   218  			_, err := heartbeat.Ping(context.Background(), request)
   219  			if td.expectError && err == nil {
   220  				t.Error("expected cluster ID mismatch error")
   221  			}
   222  			if !td.expectError && err != nil {
   223  				t.Errorf("unexpected error: %s", err)
   224  			}
   225  		})
   226  	}
   227  }
   228  
   229  func TestNodeIDCompare(t *testing.T) {
   230  	defer leaktest.AfterTest(t)()
   231  	testData := []struct {
   232  		name         string
   233  		serverNodeID roachpb.NodeID
   234  		clientNodeID roachpb.NodeID
   235  		expectError  bool
   236  	}{
   237  		{"node IDs match", 1, 1, false},
   238  		{"their node ID missing", 1, 0, false},
   239  		{"our node ID missing", 0, 1, true},
   240  		{"both node IDs missing", 0, 0, false},
   241  		{"node ID mismatch", 1, 2, true},
   242  	}
   243  
   244  	manual := hlc.NewManualClock(5)
   245  	clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
   246  	st := cluster.MakeTestingClusterSettings()
   247  	heartbeat := &HeartbeatService{
   248  		clock:              clock,
   249  		remoteClockMonitor: newRemoteClockMonitor(clock, time.Hour, 0),
   250  		clusterID:          &base.ClusterIDContainer{},
   251  		nodeID:             &base.NodeIDContainer{},
   252  		settings:           st,
   253  	}
   254  
   255  	for _, td := range testData {
   256  		t.Run(td.name, func(t *testing.T) {
   257  			heartbeat.nodeID.Reset(td.serverNodeID)
   258  			request := &PingRequest{
   259  				Ping:          "testPing",
   260  				NodeID:        td.clientNodeID,
   261  				ServerVersion: st.Version.BinaryVersion(),
   262  			}
   263  			_, err := heartbeat.Ping(context.Background(), request)
   264  			if td.expectError && err == nil {
   265  				t.Error("expected node ID mismatch error")
   266  			}
   267  			if !td.expectError && err != nil {
   268  				t.Errorf("unexpected error: %s", err)
   269  			}
   270  		})
   271  	}
   272  }
   273  
   274  // HeartbeatStreamService is like HeartbeatService, but it implements the
   275  // TestingHeartbeatStreamServer interface in addition to the HeartbeatServer
   276  // interface. Instead of providing a request-response model, the service reads
   277  // on its input stream and periodically sends on its output stream with its
   278  // latest ping response. This means that even if the service stops receiving
   279  // requests, it will continue to send responses.
   280  type HeartbeatStreamService struct {
   281  	HeartbeatService
   282  	interval time.Duration
   283  }
   284  
   285  func (hss *HeartbeatStreamService) PingStream(
   286  	stream TestingHeartbeatStream_PingStreamServer,
   287  ) error {
   288  	ctx := stream.Context()
   289  
   290  	// Launch a goroutine to read from the stream and construct responses.
   291  	respC := make(chan *PingResponse)
   292  	recvErrC := make(chan error, 1)
   293  	sendErrC := make(chan struct{})
   294  	go func() {
   295  		for {
   296  			ping, err := stream.Recv()
   297  			if err != nil {
   298  				recvErrC <- err
   299  				return
   300  			}
   301  			resp, err := hss.Ping(ctx, ping)
   302  			if err != nil {
   303  				recvErrC <- err
   304  				return
   305  			}
   306  			select {
   307  			case respC <- resp:
   308  				continue
   309  			case <-sendErrC:
   310  				return
   311  			}
   312  		}
   313  	}()
   314  
   315  	// Launch a timer to periodically send the ping responses, even if the
   316  	// response has not been updated.
   317  	t := time.NewTicker(hss.interval)
   318  	defer t.Stop()
   319  
   320  	var resp *PingResponse
   321  	for {
   322  		select {
   323  		case <-t.C:
   324  			if resp != nil {
   325  				err := stream.Send(resp)
   326  				if err != nil {
   327  					close(sendErrC)
   328  					return err
   329  				}
   330  			}
   331  		case resp = <-respC:
   332  			// Update resp.
   333  		case err := <-recvErrC:
   334  			return err
   335  		}
   336  	}
   337  }
   338  
   339  // lockedPingStreamClient is an implementation of
   340  // HeartbeatStream_PingStreamClient which provides support for concurrent calls
   341  // to Send. Note that the default implementation of grpc.Stream for server
   342  // responses (grpc.serverStream) is not safe for concurrent calls to Send.
   343  type lockedPingStreamClient struct {
   344  	TestingHeartbeatStream_PingStreamClient
   345  	sendMu syncutil.Mutex
   346  }
   347  
   348  func (c *lockedPingStreamClient) Send(req *PingRequest) error {
   349  	c.sendMu.Lock()
   350  	defer c.sendMu.Unlock()
   351  	return c.TestingHeartbeatStream_PingStreamClient.Send(req)
   352  }