github.com/google/cloudprober@v0.11.3/probes/grpc/grpc_test.go (about) 1 package grpc 2 3 import ( 4 "context" 5 "fmt" 6 "net" 7 "reflect" 8 "strconv" 9 "sync" 10 "testing" 11 "time" 12 13 "github.com/golang/protobuf/proto" 14 "github.com/google/cloudprober/logger" 15 "github.com/google/cloudprober/metrics" 16 "github.com/google/cloudprober/metrics/testutils" 17 "github.com/google/cloudprober/probes/options" 18 probepb "github.com/google/cloudprober/probes/proto" 19 grpcpb "github.com/google/cloudprober/servers/grpc/proto" 20 spb "github.com/google/cloudprober/servers/grpc/proto" 21 "github.com/google/cloudprober/targets" 22 "github.com/google/cloudprober/targets/endpoint" 23 "github.com/google/cloudprober/targets/resolver" 24 "google.golang.org/grpc" 25 ) 26 27 var once sync.Once 28 var srvAddr string 29 var baseProbeConf = ` 30 name: "grpc" 31 type: GRPC 32 targets { 33 host_names: "%s" 34 } 35 interval_msec: 1000 36 timeout_msec: %d 37 grpc_probe { 38 %s 39 num_conns: %d 40 connect_timeout_msec: 2000 41 } 42 ` 43 44 func probeCfg(tgts, cred string, timeout, numConns int) (*probepb.ProbeDef, error) { 45 conf := fmt.Sprintf(baseProbeConf, tgts, timeout, cred, numConns) 46 cfg := &probepb.ProbeDef{} 47 err := proto.UnmarshalText(conf, cfg) 48 return cfg, err 49 } 50 51 type Server struct { 52 delay time.Duration 53 msg []byte 54 } 55 56 // Echo reflects back the incoming message. 57 // TODO: return error if EchoMessage is greater than maxMsgSize. 58 func (s *Server) Echo(ctx context.Context, req *spb.EchoMessage) (*spb.EchoMessage, error) { 59 if s.delay > 0 { 60 time.Sleep(s.delay) 61 } 62 return req, nil 63 } 64 65 // BlobRead returns a blob of data. 66 func (s *Server) BlobRead(ctx context.Context, req *spb.BlobReadRequest) (*spb.BlobReadResponse, error) { 67 return &spb.BlobReadResponse{ 68 Blob: s.msg[0:req.GetSize()], 69 }, nil 70 } 71 72 // ServerStatus returns the current server status. 73 func (s *Server) ServerStatus(ctx context.Context, req *spb.StatusRequest) (*spb.StatusResponse, error) { 74 return &spb.StatusResponse{ 75 UptimeUs: proto.Int64(42), 76 }, nil 77 } 78 79 // BlobWrite returns the size of blob in the WriteRequest. It does not operate 80 // on the blob. 81 func (s *Server) BlobWrite(ctx context.Context, req *spb.BlobWriteRequest) (*spb.BlobWriteResponse, error) { 82 return &spb.BlobWriteResponse{ 83 Size: proto.Int32(int32(len(req.Blob))), 84 }, nil 85 } 86 87 // globalGRPCServer sets up runconfig and returns a gRPC server. 88 func globalGRPCServer() (string, error) { 89 var err error 90 once.Do(func() { 91 var ln net.Listener 92 ln, err = net.Listen("tcp", "localhost:0") 93 if err != nil { 94 return 95 } 96 grpcSrv := grpc.NewServer() 97 srv := &Server{delay: time.Second / 2, msg: make([]byte, 1024)} 98 grpcpb.RegisterProberServer(grpcSrv, srv) 99 go grpcSrv.Serve(ln) 100 tcpAddr := ln.Addr().(*net.TCPAddr) 101 srvAddr = net.JoinHostPort(tcpAddr.IP.String(), strconv.Itoa(tcpAddr.Port)) 102 time.Sleep(time.Second * 2) 103 }) 104 return srvAddr, err 105 } 106 107 // TestGRPCSuccess tests probe output on success. 108 // 2 connections, 1 probe/sec/conn, stats exported every 5 sec 109 // => 5-10 results/interval. Test looks for minimum of 7 results. 110 func TestGRPCSuccess(t *testing.T) { 111 addr, err := globalGRPCServer() 112 if err != nil { 113 t.Fatalf("Error initializing global config: %v", err) 114 } 115 cfg, err := probeCfg(addr, "", 1000, 2) 116 if err != nil { 117 t.Fatalf("Error unmarshalling config: %v", err) 118 } 119 l := &logger.Logger{} 120 121 iters := 5 122 statsExportInterval := time.Duration(iters) * time.Second 123 124 probeOpts := &options.Options{ 125 Targets: targets.StaticTargets(addr), 126 Timeout: time.Second * 1, 127 Interval: time.Second * 1, 128 ProbeConf: cfg.GetGrpcProbe(), 129 Logger: l, 130 StatsExportInterval: statsExportInterval, 131 LogMetrics: func(em *metrics.EventMetrics) {}, 132 } 133 p := &Probe{} 134 p.Init("grpc-success", probeOpts) 135 dataChan := make(chan *metrics.EventMetrics, 5) 136 ctx, cancel := context.WithCancel(context.Background()) 137 var wg sync.WaitGroup 138 wg.Add(1) 139 go func() { 140 defer wg.Done() 141 p.Start(ctx, dataChan) 142 }() 143 time.Sleep(statsExportInterval * 2) 144 found := false 145 expectedLabels := map[string]string{ 146 "ptype": "grpc", 147 "dst": addr, 148 "probe": "grpc-success", 149 } 150 151 for i := 0; i < 2; i++ { 152 select { 153 case em := <-dataChan: 154 t.Logf("Probe results: %v", em.String()) 155 total := em.Metric("total").(*metrics.Int) 156 success := em.Metric("success").(*metrics.Int) 157 expect := int64(iters) + 2 158 if total.Int64() < expect || success.Int64() < expect { 159 t.Errorf("Got total=%d success=%d, expecting at least %d for each", total.Int64(), success.Int64(), expect) 160 } 161 gotLabels := make(map[string]string) 162 for _, k := range em.LabelsKeys() { 163 gotLabels[k] = em.Label(k) 164 } 165 if !reflect.DeepEqual(gotLabels, expectedLabels) { 166 t.Errorf("Unexpected labels: got: %v, expected: %v", gotLabels, expectedLabels) 167 } 168 found = true 169 default: 170 time.Sleep(time.Second) 171 } 172 } 173 if !found { 174 t.Errorf("No probe results found") 175 } 176 177 cancel() 178 wg.Wait() 179 } 180 181 // TestConnectFailures attempts to connect to localhost:9 (discard port) and 182 // checks that stats are exported once every connect timeout. 183 // 2 connections, 0.5 connect attempt/sec/conn, stats exported every 6 sec 184 // => 3 - 6 connect errors/sec. Test looks for minimum of 4 attempts. 185 func TestConnectFailures(t *testing.T) { 186 addr := "localhost:9" 187 cfg, err := probeCfg(addr, "", 1000, 2) 188 if err != nil { 189 t.Fatalf("Error unmarshalling config: %v", err) 190 } 191 l := &logger.Logger{} 192 193 iters := 6 194 statsExportInterval := time.Duration(iters) * time.Second 195 196 probeOpts := &options.Options{ 197 Targets: targets.StaticTargets(addr), 198 Timeout: time.Second * 1, 199 Interval: time.Second * 1, 200 ProbeConf: cfg.GetGrpcProbe(), 201 Logger: l, 202 StatsExportInterval: statsExportInterval, 203 LogMetrics: func(em *metrics.EventMetrics) {}, 204 } 205 p := &Probe{} 206 p.Init("grpc-connectfail", probeOpts) 207 dataChan := make(chan *metrics.EventMetrics, 5) 208 ctx, cancel := context.WithCancel(context.Background()) 209 var wg sync.WaitGroup 210 wg.Add(1) 211 go func() { 212 defer wg.Done() 213 p.Start(ctx, dataChan) 214 }() 215 time.Sleep(statsExportInterval * 2) 216 found := false 217 for i := 0; i < 2; i++ { 218 select { 219 case em := <-dataChan: 220 t.Logf("Probe results: %v", em.String()) 221 total := em.Metric("total").(*metrics.Int) 222 success := em.Metric("success").(*metrics.Int) 223 connectErrs := em.Metric("connecterrors").(*metrics.Int) 224 expect := int64(iters/2) + 1 225 if success.Int64() > 0 { 226 t.Errorf("Got %d probe successes, want all failures", success.Int64()) 227 } 228 if total.Int64() < expect || connectErrs.Int64() < expect { 229 t.Errorf("Got total=%d connectErrs=%d, expecting at least %d for each", total.Int64(), connectErrs.Int64(), expect) 230 } 231 found = true 232 default: 233 time.Sleep(time.Second) 234 } 235 } 236 if !found { 237 t.Errorf("No probe results found") 238 } 239 240 cancel() 241 wg.Wait() 242 } 243 244 func TestProbeTimeouts(t *testing.T) { 245 addr, err := globalGRPCServer() 246 if err != nil { 247 t.Fatalf("Error initializing global config: %v", err) 248 } 249 cfg, err := probeCfg(addr, "", 1000, 1) 250 if err != nil { 251 t.Fatalf("Error unmarshalling config: %v", err) 252 } 253 l := &logger.Logger{} 254 255 iters := 5 256 statsExportInterval := time.Duration(iters) * time.Second 257 258 probeOpts := &options.Options{ 259 Targets: targets.StaticTargets(addr), 260 Timeout: time.Millisecond * 100, 261 Interval: time.Second * 1, 262 ProbeConf: cfg.GetGrpcProbe(), 263 Logger: l, 264 LatencyUnit: time.Millisecond, 265 StatsExportInterval: statsExportInterval, 266 LogMetrics: func(em *metrics.EventMetrics) {}, 267 } 268 p := &Probe{} 269 p.Init("grpc-reqtimeout", probeOpts) 270 dataChan := make(chan *metrics.EventMetrics, 5) 271 272 ctx, cancel := context.WithCancel(context.Background()) 273 var wg sync.WaitGroup 274 wg.Add(1) 275 go func() { 276 defer wg.Done() 277 p.Start(ctx, dataChan) 278 }() 279 ems, err := testutils.MetricsFromChannel(dataChan, 2, statsExportInterval*3) 280 if err != nil { 281 t.Fatalf("Error retrieving metrics: %v", err) 282 } 283 mm := testutils.MetricsMap(ems) 284 for target, vals := range mm["success"] { 285 for _, v := range vals { 286 success := v.Metric("success").(*metrics.Int) 287 if success.Int64() > 0 { 288 t.Errorf("Tgt %s unexpectedly succeeds, got=%d, want=0.", target, success.Int64()) 289 break 290 } 291 } 292 } 293 294 found := false 295 for target, vals := range mm["total"] { 296 prevTotal := int64(0) 297 for _, v := range vals { 298 total := v.Metric("total").(*metrics.Int) 299 delta := total.Int64() - prevTotal 300 // Even a single probe in iter is treated as success. 301 if delta <= 0 { 302 t.Errorf("Tgt %s did not get enough probes, got=%d, want>=1", target, delta) 303 break 304 } 305 found = true 306 } 307 } 308 if !found { 309 t.Errorf("No probe results found") 310 } 311 cancel() 312 wg.Wait() 313 } 314 315 type testTargets struct { 316 r *resolver.Resolver 317 318 start time.Time 319 startTargets []endpoint.Endpoint 320 321 switchDur time.Duration 322 nextTargets []endpoint.Endpoint 323 } 324 325 func newTargets(startTargets, nextTargets []endpoint.Endpoint, switchDur time.Duration) targets.Targets { 326 return &testTargets{r: resolver.New(), startTargets: startTargets, nextTargets: nextTargets, start: time.Now(), switchDur: switchDur} 327 } 328 329 func (t *testTargets) ListEndpoints() []endpoint.Endpoint { 330 if time.Since(t.start) > t.switchDur { 331 return t.nextTargets 332 } 333 return t.startTargets 334 } 335 336 func (t *testTargets) Resolve(name string, ipVer int) (net.IP, error) { 337 return t.r.Resolve(name, ipVer) 338 } 339 340 func sumIntMetrics(inp []*metrics.EventMetrics, metricName string) int64 { 341 sum := metrics.NewInt(0) 342 for _, em := range inp { 343 sum.Add(em.Metric(metricName)) 344 } 345 return sum.Int64() 346 } 347 348 func TestTargets(t *testing.T) { 349 addr, err := globalGRPCServer() 350 if err != nil { 351 t.Fatalf("Error initializing global config: %v", err) 352 } 353 cfg, err := probeCfg(addr, "", 1000, 2) 354 if err != nil { 355 t.Fatalf("Error unmarshalling config: %v", err) 356 } 357 l := &logger.Logger{} 358 359 goodTargets := targets.StaticTargets(addr).ListEndpoints() 360 badTargets := targets.StaticTargets("localhost:1,localhost:2").ListEndpoints() 361 362 // Target discovery changes from good to bad targets after 2 statsExports. 363 // And probe continues for 10 more stats exports. 364 statsExportInterval := 1 * time.Second 365 TargetsUpdateInterval = 2 * time.Second 366 probeRunTime := 12 * time.Second 367 368 probeOpts := &options.Options{ 369 Targets: newTargets(goodTargets, badTargets, TargetsUpdateInterval-time.Second), 370 Timeout: time.Second, 371 Interval: time.Second * 1, 372 ProbeConf: cfg.GetGrpcProbe(), 373 Logger: l, 374 LatencyUnit: time.Millisecond, 375 StatsExportInterval: statsExportInterval, 376 LogMetrics: func(em *metrics.EventMetrics) {}, 377 } 378 p := &Probe{} 379 p.Init("grpc", probeOpts) 380 dataChan := make(chan *metrics.EventMetrics, 10) 381 ctx, cancel := context.WithCancel(context.Background()) 382 var wg sync.WaitGroup 383 wg.Add(1) 384 go func() { 385 defer wg.Done() 386 p.Start(ctx, dataChan) 387 }() 388 389 ems, err := testutils.MetricsFromChannel(dataChan, 12, probeRunTime) 390 if err != nil { 391 t.Fatalf("Error retrieving metrics: %v", err) 392 } 393 mm := testutils.MetricsMap(ems) 394 395 connErrTargets := make(map[string]int64) 396 connErrIterCount := 0 397 for target, vals := range mm["connecterrors"] { 398 s := sumIntMetrics(vals, "connecterrors") 399 if s > 0 { 400 connErrTargets[target] = s 401 } 402 if len(vals) > connErrIterCount { 403 connErrIterCount = len(vals) 404 } 405 } 406 407 successTargets := make(map[string]int64) 408 successIterCount := 0 409 for target, vals := range mm["success"] { 410 s := sumIntMetrics(vals, "success") 411 if s > 0 { 412 successTargets[target] = s 413 if connErrTargets[target] > 0 { 414 t.Errorf("Target %s has both success and failures.", target) 415 } 416 if len(vals) > successIterCount { 417 successIterCount = len(vals) 418 } 419 } 420 } 421 422 if len(successTargets) == 0 { 423 t.Errorf("Got zero targets with success, want at least one.") 424 } 425 if len(connErrTargets) == 0 { 426 t.Errorf("Got zero targets with connection errors, want at least one.") 427 } 428 if successIterCount >= connErrIterCount { 429 t.Errorf("Got successIters(%d) >= connErrIters(%d), want '<'.", successIterCount, connErrIterCount) 430 } 431 432 cancel() 433 wg.Wait() 434 }