github.com/google/cloudprober@v0.11.3/probes/grpc/grpc_test.go (about)

     1  package grpc
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net"
     7  	"reflect"
     8  	"strconv"
     9  	"sync"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/golang/protobuf/proto"
    14  	"github.com/google/cloudprober/logger"
    15  	"github.com/google/cloudprober/metrics"
    16  	"github.com/google/cloudprober/metrics/testutils"
    17  	"github.com/google/cloudprober/probes/options"
    18  	probepb "github.com/google/cloudprober/probes/proto"
    19  	grpcpb "github.com/google/cloudprober/servers/grpc/proto"
    20  	spb "github.com/google/cloudprober/servers/grpc/proto"
    21  	"github.com/google/cloudprober/targets"
    22  	"github.com/google/cloudprober/targets/endpoint"
    23  	"github.com/google/cloudprober/targets/resolver"
    24  	"google.golang.org/grpc"
    25  )
    26  
    27  var once sync.Once
    28  var srvAddr string
    29  var baseProbeConf = `
    30  name: "grpc"
    31  type: GRPC
    32  targets {
    33  	host_names: "%s"
    34  }
    35  interval_msec: 1000
    36  timeout_msec: %d
    37  grpc_probe {
    38  	%s
    39  	num_conns: %d
    40  	connect_timeout_msec: 2000
    41  }
    42  `
    43  
    44  func probeCfg(tgts, cred string, timeout, numConns int) (*probepb.ProbeDef, error) {
    45  	conf := fmt.Sprintf(baseProbeConf, tgts, timeout, cred, numConns)
    46  	cfg := &probepb.ProbeDef{}
    47  	err := proto.UnmarshalText(conf, cfg)
    48  	return cfg, err
    49  }
    50  
    51  type Server struct {
    52  	delay time.Duration
    53  	msg   []byte
    54  }
    55  
    56  // Echo reflects back the incoming message.
    57  // TODO: return error if EchoMessage is greater than maxMsgSize.
    58  func (s *Server) Echo(ctx context.Context, req *spb.EchoMessage) (*spb.EchoMessage, error) {
    59  	if s.delay > 0 {
    60  		time.Sleep(s.delay)
    61  	}
    62  	return req, nil
    63  }
    64  
    65  // BlobRead returns a blob of data.
    66  func (s *Server) BlobRead(ctx context.Context, req *spb.BlobReadRequest) (*spb.BlobReadResponse, error) {
    67  	return &spb.BlobReadResponse{
    68  		Blob: s.msg[0:req.GetSize()],
    69  	}, nil
    70  }
    71  
    72  // ServerStatus returns the current server status.
    73  func (s *Server) ServerStatus(ctx context.Context, req *spb.StatusRequest) (*spb.StatusResponse, error) {
    74  	return &spb.StatusResponse{
    75  		UptimeUs: proto.Int64(42),
    76  	}, nil
    77  }
    78  
    79  // BlobWrite returns the size of blob in the WriteRequest. It does not operate
    80  // on the blob.
    81  func (s *Server) BlobWrite(ctx context.Context, req *spb.BlobWriteRequest) (*spb.BlobWriteResponse, error) {
    82  	return &spb.BlobWriteResponse{
    83  		Size: proto.Int32(int32(len(req.Blob))),
    84  	}, nil
    85  }
    86  
    87  // globalGRPCServer sets up runconfig and returns a gRPC server.
    88  func globalGRPCServer() (string, error) {
    89  	var err error
    90  	once.Do(func() {
    91  		var ln net.Listener
    92  		ln, err = net.Listen("tcp", "localhost:0")
    93  		if err != nil {
    94  			return
    95  		}
    96  		grpcSrv := grpc.NewServer()
    97  		srv := &Server{delay: time.Second / 2, msg: make([]byte, 1024)}
    98  		grpcpb.RegisterProberServer(grpcSrv, srv)
    99  		go grpcSrv.Serve(ln)
   100  		tcpAddr := ln.Addr().(*net.TCPAddr)
   101  		srvAddr = net.JoinHostPort(tcpAddr.IP.String(), strconv.Itoa(tcpAddr.Port))
   102  		time.Sleep(time.Second * 2)
   103  	})
   104  	return srvAddr, err
   105  }
   106  
   107  // TestGRPCSuccess tests probe output on success.
   108  // 2 connections, 1 probe/sec/conn, stats exported every 5 sec
   109  // 	=> 5-10 results/interval. Test looks for minimum of 7 results.
   110  func TestGRPCSuccess(t *testing.T) {
   111  	addr, err := globalGRPCServer()
   112  	if err != nil {
   113  		t.Fatalf("Error initializing global config: %v", err)
   114  	}
   115  	cfg, err := probeCfg(addr, "", 1000, 2)
   116  	if err != nil {
   117  		t.Fatalf("Error unmarshalling config: %v", err)
   118  	}
   119  	l := &logger.Logger{}
   120  
   121  	iters := 5
   122  	statsExportInterval := time.Duration(iters) * time.Second
   123  
   124  	probeOpts := &options.Options{
   125  		Targets:             targets.StaticTargets(addr),
   126  		Timeout:             time.Second * 1,
   127  		Interval:            time.Second * 1,
   128  		ProbeConf:           cfg.GetGrpcProbe(),
   129  		Logger:              l,
   130  		StatsExportInterval: statsExportInterval,
   131  		LogMetrics:          func(em *metrics.EventMetrics) {},
   132  	}
   133  	p := &Probe{}
   134  	p.Init("grpc-success", probeOpts)
   135  	dataChan := make(chan *metrics.EventMetrics, 5)
   136  	ctx, cancel := context.WithCancel(context.Background())
   137  	var wg sync.WaitGroup
   138  	wg.Add(1)
   139  	go func() {
   140  		defer wg.Done()
   141  		p.Start(ctx, dataChan)
   142  	}()
   143  	time.Sleep(statsExportInterval * 2)
   144  	found := false
   145  	expectedLabels := map[string]string{
   146  		"ptype": "grpc",
   147  		"dst":   addr,
   148  		"probe": "grpc-success",
   149  	}
   150  
   151  	for i := 0; i < 2; i++ {
   152  		select {
   153  		case em := <-dataChan:
   154  			t.Logf("Probe results: %v", em.String())
   155  			total := em.Metric("total").(*metrics.Int)
   156  			success := em.Metric("success").(*metrics.Int)
   157  			expect := int64(iters) + 2
   158  			if total.Int64() < expect || success.Int64() < expect {
   159  				t.Errorf("Got total=%d success=%d, expecting at least %d for each", total.Int64(), success.Int64(), expect)
   160  			}
   161  			gotLabels := make(map[string]string)
   162  			for _, k := range em.LabelsKeys() {
   163  				gotLabels[k] = em.Label(k)
   164  			}
   165  			if !reflect.DeepEqual(gotLabels, expectedLabels) {
   166  				t.Errorf("Unexpected labels: got: %v, expected: %v", gotLabels, expectedLabels)
   167  			}
   168  			found = true
   169  		default:
   170  			time.Sleep(time.Second)
   171  		}
   172  	}
   173  	if !found {
   174  		t.Errorf("No probe results found")
   175  	}
   176  
   177  	cancel()
   178  	wg.Wait()
   179  }
   180  
   181  // TestConnectFailures attempts to connect to localhost:9 (discard port) and
   182  // checks that stats are exported once every connect timeout.
   183  // 2 connections, 0.5 connect attempt/sec/conn, stats exported every 6 sec
   184  //  => 3 - 6 connect errors/sec. Test looks for minimum of 4 attempts.
   185  func TestConnectFailures(t *testing.T) {
   186  	addr := "localhost:9"
   187  	cfg, err := probeCfg(addr, "", 1000, 2)
   188  	if err != nil {
   189  		t.Fatalf("Error unmarshalling config: %v", err)
   190  	}
   191  	l := &logger.Logger{}
   192  
   193  	iters := 6
   194  	statsExportInterval := time.Duration(iters) * time.Second
   195  
   196  	probeOpts := &options.Options{
   197  		Targets:             targets.StaticTargets(addr),
   198  		Timeout:             time.Second * 1,
   199  		Interval:            time.Second * 1,
   200  		ProbeConf:           cfg.GetGrpcProbe(),
   201  		Logger:              l,
   202  		StatsExportInterval: statsExportInterval,
   203  		LogMetrics:          func(em *metrics.EventMetrics) {},
   204  	}
   205  	p := &Probe{}
   206  	p.Init("grpc-connectfail", probeOpts)
   207  	dataChan := make(chan *metrics.EventMetrics, 5)
   208  	ctx, cancel := context.WithCancel(context.Background())
   209  	var wg sync.WaitGroup
   210  	wg.Add(1)
   211  	go func() {
   212  		defer wg.Done()
   213  		p.Start(ctx, dataChan)
   214  	}()
   215  	time.Sleep(statsExportInterval * 2)
   216  	found := false
   217  	for i := 0; i < 2; i++ {
   218  		select {
   219  		case em := <-dataChan:
   220  			t.Logf("Probe results: %v", em.String())
   221  			total := em.Metric("total").(*metrics.Int)
   222  			success := em.Metric("success").(*metrics.Int)
   223  			connectErrs := em.Metric("connecterrors").(*metrics.Int)
   224  			expect := int64(iters/2) + 1
   225  			if success.Int64() > 0 {
   226  				t.Errorf("Got %d probe successes, want all failures", success.Int64())
   227  			}
   228  			if total.Int64() < expect || connectErrs.Int64() < expect {
   229  				t.Errorf("Got total=%d connectErrs=%d, expecting at least %d for each", total.Int64(), connectErrs.Int64(), expect)
   230  			}
   231  			found = true
   232  		default:
   233  			time.Sleep(time.Second)
   234  		}
   235  	}
   236  	if !found {
   237  		t.Errorf("No probe results found")
   238  	}
   239  
   240  	cancel()
   241  	wg.Wait()
   242  }
   243  
   244  func TestProbeTimeouts(t *testing.T) {
   245  	addr, err := globalGRPCServer()
   246  	if err != nil {
   247  		t.Fatalf("Error initializing global config: %v", err)
   248  	}
   249  	cfg, err := probeCfg(addr, "", 1000, 1)
   250  	if err != nil {
   251  		t.Fatalf("Error unmarshalling config: %v", err)
   252  	}
   253  	l := &logger.Logger{}
   254  
   255  	iters := 5
   256  	statsExportInterval := time.Duration(iters) * time.Second
   257  
   258  	probeOpts := &options.Options{
   259  		Targets:             targets.StaticTargets(addr),
   260  		Timeout:             time.Millisecond * 100,
   261  		Interval:            time.Second * 1,
   262  		ProbeConf:           cfg.GetGrpcProbe(),
   263  		Logger:              l,
   264  		LatencyUnit:         time.Millisecond,
   265  		StatsExportInterval: statsExportInterval,
   266  		LogMetrics:          func(em *metrics.EventMetrics) {},
   267  	}
   268  	p := &Probe{}
   269  	p.Init("grpc-reqtimeout", probeOpts)
   270  	dataChan := make(chan *metrics.EventMetrics, 5)
   271  
   272  	ctx, cancel := context.WithCancel(context.Background())
   273  	var wg sync.WaitGroup
   274  	wg.Add(1)
   275  	go func() {
   276  		defer wg.Done()
   277  		p.Start(ctx, dataChan)
   278  	}()
   279  	ems, err := testutils.MetricsFromChannel(dataChan, 2, statsExportInterval*3)
   280  	if err != nil {
   281  		t.Fatalf("Error retrieving metrics: %v", err)
   282  	}
   283  	mm := testutils.MetricsMap(ems)
   284  	for target, vals := range mm["success"] {
   285  		for _, v := range vals {
   286  			success := v.Metric("success").(*metrics.Int)
   287  			if success.Int64() > 0 {
   288  				t.Errorf("Tgt %s unexpectedly succeeds, got=%d, want=0.", target, success.Int64())
   289  				break
   290  			}
   291  		}
   292  	}
   293  
   294  	found := false
   295  	for target, vals := range mm["total"] {
   296  		prevTotal := int64(0)
   297  		for _, v := range vals {
   298  			total := v.Metric("total").(*metrics.Int)
   299  			delta := total.Int64() - prevTotal
   300  			// Even a single probe in iter is treated as success.
   301  			if delta <= 0 {
   302  				t.Errorf("Tgt %s did not get enough probes, got=%d, want>=1", target, delta)
   303  				break
   304  			}
   305  			found = true
   306  		}
   307  	}
   308  	if !found {
   309  		t.Errorf("No probe results found")
   310  	}
   311  	cancel()
   312  	wg.Wait()
   313  }
   314  
   315  type testTargets struct {
   316  	r *resolver.Resolver
   317  
   318  	start        time.Time
   319  	startTargets []endpoint.Endpoint
   320  
   321  	switchDur   time.Duration
   322  	nextTargets []endpoint.Endpoint
   323  }
   324  
   325  func newTargets(startTargets, nextTargets []endpoint.Endpoint, switchDur time.Duration) targets.Targets {
   326  	return &testTargets{r: resolver.New(), startTargets: startTargets, nextTargets: nextTargets, start: time.Now(), switchDur: switchDur}
   327  }
   328  
   329  func (t *testTargets) ListEndpoints() []endpoint.Endpoint {
   330  	if time.Since(t.start) > t.switchDur {
   331  		return t.nextTargets
   332  	}
   333  	return t.startTargets
   334  }
   335  
   336  func (t *testTargets) Resolve(name string, ipVer int) (net.IP, error) {
   337  	return t.r.Resolve(name, ipVer)
   338  }
   339  
   340  func sumIntMetrics(inp []*metrics.EventMetrics, metricName string) int64 {
   341  	sum := metrics.NewInt(0)
   342  	for _, em := range inp {
   343  		sum.Add(em.Metric(metricName))
   344  	}
   345  	return sum.Int64()
   346  }
   347  
   348  func TestTargets(t *testing.T) {
   349  	addr, err := globalGRPCServer()
   350  	if err != nil {
   351  		t.Fatalf("Error initializing global config: %v", err)
   352  	}
   353  	cfg, err := probeCfg(addr, "", 1000, 2)
   354  	if err != nil {
   355  		t.Fatalf("Error unmarshalling config: %v", err)
   356  	}
   357  	l := &logger.Logger{}
   358  
   359  	goodTargets := targets.StaticTargets(addr).ListEndpoints()
   360  	badTargets := targets.StaticTargets("localhost:1,localhost:2").ListEndpoints()
   361  
   362  	// Target discovery changes from good to bad targets after 2 statsExports.
   363  	// And probe continues for 10 more stats exports.
   364  	statsExportInterval := 1 * time.Second
   365  	TargetsUpdateInterval = 2 * time.Second
   366  	probeRunTime := 12 * time.Second
   367  
   368  	probeOpts := &options.Options{
   369  		Targets:             newTargets(goodTargets, badTargets, TargetsUpdateInterval-time.Second),
   370  		Timeout:             time.Second,
   371  		Interval:            time.Second * 1,
   372  		ProbeConf:           cfg.GetGrpcProbe(),
   373  		Logger:              l,
   374  		LatencyUnit:         time.Millisecond,
   375  		StatsExportInterval: statsExportInterval,
   376  		LogMetrics:          func(em *metrics.EventMetrics) {},
   377  	}
   378  	p := &Probe{}
   379  	p.Init("grpc", probeOpts)
   380  	dataChan := make(chan *metrics.EventMetrics, 10)
   381  	ctx, cancel := context.WithCancel(context.Background())
   382  	var wg sync.WaitGroup
   383  	wg.Add(1)
   384  	go func() {
   385  		defer wg.Done()
   386  		p.Start(ctx, dataChan)
   387  	}()
   388  
   389  	ems, err := testutils.MetricsFromChannel(dataChan, 12, probeRunTime)
   390  	if err != nil {
   391  		t.Fatalf("Error retrieving metrics: %v", err)
   392  	}
   393  	mm := testutils.MetricsMap(ems)
   394  
   395  	connErrTargets := make(map[string]int64)
   396  	connErrIterCount := 0
   397  	for target, vals := range mm["connecterrors"] {
   398  		s := sumIntMetrics(vals, "connecterrors")
   399  		if s > 0 {
   400  			connErrTargets[target] = s
   401  		}
   402  		if len(vals) > connErrIterCount {
   403  			connErrIterCount = len(vals)
   404  		}
   405  	}
   406  
   407  	successTargets := make(map[string]int64)
   408  	successIterCount := 0
   409  	for target, vals := range mm["success"] {
   410  		s := sumIntMetrics(vals, "success")
   411  		if s > 0 {
   412  			successTargets[target] = s
   413  			if connErrTargets[target] > 0 {
   414  				t.Errorf("Target %s has both success and failures.", target)
   415  			}
   416  			if len(vals) > successIterCount {
   417  				successIterCount = len(vals)
   418  			}
   419  		}
   420  	}
   421  
   422  	if len(successTargets) == 0 {
   423  		t.Errorf("Got zero targets with success, want at least one.")
   424  	}
   425  	if len(connErrTargets) == 0 {
   426  		t.Errorf("Got zero targets with connection errors, want at least one.")
   427  	}
   428  	if successIterCount >= connErrIterCount {
   429  		t.Errorf("Got successIters(%d) >= connErrIters(%d), want '<'.", successIterCount, connErrIterCount)
   430  	}
   431  
   432  	cancel()
   433  	wg.Wait()
   434  }