github.com/google/cloudprober@v0.11.3/probes/udp/udp_test.go (about)

     1  // Copyright 2017-2021 The Cloudprober Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Workaround to skip UDP tests using a tag, until
    16  // https://github.com/google/cloudprober/issues/199 is fixed.
    17  //go:build !skip_udp_probe_test
    18  // +build !skip_udp_probe_test
    19  
    20  package udp
    21  
    22  import (
    23  	"context"
    24  	"net"
    25  	"os"
    26  	"sync"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/golang/protobuf/proto"
    31  	"github.com/google/cloudprober/logger"
    32  	"github.com/google/cloudprober/metrics"
    33  	"github.com/google/cloudprober/probes/options"
    34  	configpb "github.com/google/cloudprober/probes/udp/proto"
    35  	"github.com/google/cloudprober/sysvars"
    36  	"github.com/google/cloudprober/targets"
    37  )
    38  
    39  type serverConnStats struct {
    40  	sync.Mutex
    41  	msgCt map[string]int
    42  }
    43  
    44  func startUDPServer(ctx context.Context, t *testing.T, drop bool, delay time.Duration) (int, *serverConnStats) {
    45  	conn, err := net.ListenUDP("udp", nil)
    46  	if err != nil {
    47  		t.Fatalf("Starting UDP server failed: %v", err)
    48  	}
    49  	t.Logf("Recv addr: %s", conn.LocalAddr().String())
    50  	// Simple loop to ECHO data.
    51  	scs := &serverConnStats{
    52  		msgCt: make(map[string]int),
    53  	}
    54  
    55  	go func() {
    56  		timeout := time.Millisecond * 100
    57  		maxLen := 1500
    58  		b := make([]byte, maxLen)
    59  		for {
    60  			select {
    61  			case <-ctx.Done():
    62  				conn.Close()
    63  				return
    64  			default:
    65  			}
    66  
    67  			conn.SetReadDeadline(time.Now().Add(timeout))
    68  			msgLen, addr, err := conn.ReadFromUDP(b)
    69  			if err != nil {
    70  				if !isClientTimeout(err) {
    71  					t.Logf("Error receiving message: %v", err)
    72  				}
    73  				continue
    74  			}
    75  			t.Logf("Message from %s, size: %d", addr.String(), msgLen)
    76  			scs.Lock()
    77  			scs.msgCt[addr.String()]++
    78  			scs.Unlock()
    79  			if drop {
    80  				continue
    81  			}
    82  			go func(b []byte, addr *net.UDPAddr) {
    83  				if delay != 0 {
    84  					time.Sleep(delay)
    85  				}
    86  				conn.SetWriteDeadline(time.Now().Add(timeout))
    87  				if _, err := conn.WriteToUDP(b, addr); err != nil {
    88  					t.Logf("Error sending message %s: %v", b, err)
    89  				}
    90  				t.Logf("Sent message to %s", addr.String())
    91  			}(append([]byte{}, b[:msgLen]...), addr)
    92  		}
    93  	}()
    94  
    95  	return conn.LocalAddr().(*net.UDPAddr).Port, scs
    96  }
    97  
    98  const numTxPorts = 2
    99  
   100  func runProbe(t *testing.T, interval, timeout time.Duration, probesToSend int, scs *serverConnStats, conf *configpb.ProbeConf) *Probe {
   101  	ctx, cancelCtx := context.WithCancel(context.Background())
   102  	var wg sync.WaitGroup
   103  
   104  	sysvars.Init(&logger.Logger{}, nil)
   105  	p := &Probe{}
   106  	ipVersion := 6
   107  	if _, ok := os.LookupEnv("TRAVIS"); ok {
   108  		ipVersion = 4
   109  	}
   110  
   111  	conf.NumTxPorts = proto.Int32(numTxPorts)
   112  	opts := &options.Options{
   113  		IPVersion:           ipVersion,
   114  		Targets:             targets.StaticTargets("localhost"),
   115  		Interval:            interval,
   116  		Timeout:             timeout,
   117  		ProbeConf:           conf,
   118  		StatsExportInterval: 10 * time.Second,
   119  	}
   120  	if err := p.Init("udp", opts); err != nil {
   121  		t.Fatalf("Error initializing UDP probe: %v", err)
   122  	}
   123  	p.targets = p.opts.Targets.ListEndpoints()
   124  	p.initProbeRunResults()
   125  
   126  	for _, conn := range p.connList {
   127  		wg.Add(1)
   128  		go func(c *net.UDPConn) {
   129  			defer wg.Done()
   130  			p.recvLoop(ctx, c)
   131  		}(conn)
   132  	}
   133  
   134  	time.Sleep(time.Second)
   135  
   136  	wg.Add(1)
   137  	go func() {
   138  		defer wg.Done()
   139  
   140  		flushTicker := time.NewTicker(p.flushIntv)
   141  		for {
   142  			select {
   143  			case <-ctx.Done():
   144  				flushTicker.Stop()
   145  				return
   146  			case <-flushTicker.C:
   147  				p.processPackets()
   148  			}
   149  		}
   150  	}()
   151  
   152  	time.Sleep(interval)
   153  	for i := 0; i < probesToSend; i++ {
   154  		p.runProbe()
   155  		time.Sleep(interval)
   156  	}
   157  
   158  	// Sleep for 2*statsExportIntv, to make sure that stats are updated and
   159  	// exported.
   160  	time.Sleep(2 * interval)
   161  	time.Sleep(2 * timeout)
   162  
   163  	scs.Lock()
   164  	defer scs.Unlock()
   165  	if len(scs.msgCt) != len(p.connList) {
   166  		t.Errorf("Got packets over %d connections, required %d", len(scs.msgCt), len(p.connList))
   167  	}
   168  	t.Logf("Echo server stats: %v", scs.msgCt)
   169  
   170  	cancelCtx()
   171  	wg.Wait()
   172  
   173  	return p
   174  }
   175  
   176  func TestSuccessMultipleCasesResultPerPort(t *testing.T) {
   177  	cases := []struct {
   178  		name        string
   179  		interval    time.Duration
   180  		timeout     time.Duration
   181  		delay       time.Duration
   182  		probeCount  int
   183  		useAllPorts bool
   184  		pktCount    int64
   185  	}{
   186  		// 10 probes, probing each target from 2 ports, at the interval of 200ms, with 100ms timeout and 10ms delay on server.
   187  		{"success_normal", 200, 100, 10, 10, true, 10},
   188  		// 20 probes, probing each target from 2 ports, at the interval of 100ms, with 1000ms timeout and 50ms delay on server.
   189  		{"success_timeout_larger_than_interval_1", 100, 1000, 50, 20, true, 20},
   190  		// 20 probes, probing each target from 2 ports, at the interval of 100ms, with 1000ms timeout and 200ms delay on server.
   191  		{"success_timeout_larger_than_interval_2", 100, 1000, 200, 20, true, 20},
   192  		// 10 probes, probing each target just once, at the interval of 200ms, with 100ms timeout and 10ms delay on server.
   193  		{"single_port", 200, 100, 10, 10, false, 5},
   194  	}
   195  
   196  	for _, c := range cases {
   197  		ctx, cancelServerCtx := context.WithCancel(context.Background())
   198  		port, scs := startUDPServer(ctx, t, false, c.delay*time.Millisecond)
   199  		t.Logf("Case(%s): started server on port %d with delay %v", c.name, port, c.delay)
   200  
   201  		conf := &configpb.ProbeConf{
   202  			UseAllTxPortsPerProbe: proto.Bool(c.useAllPorts),
   203  			Port:                  proto.Int32(int32(port)),
   204  			ExportMetricsByPort:   proto.Bool(true),
   205  		}
   206  
   207  		p := runProbe(t, c.interval*time.Millisecond, c.timeout*time.Millisecond, c.probeCount, scs, conf)
   208  		cancelServerCtx()
   209  
   210  		if len(p.connList) != numTxPorts {
   211  			t.Errorf("Case(%s): len(p.connList)=%d, want %d", c.name, len(p.connList), numTxPorts)
   212  		}
   213  		for _, port := range p.srcPortList {
   214  			res := p.res[flow{port, "localhost"}]
   215  			if res.total != c.pktCount {
   216  				t.Errorf("Case(%s): p.res[_].total=%d, want %d", c.name, res.total, c.pktCount)
   217  			}
   218  			if res.success != c.pktCount {
   219  				t.Errorf("Case(%s): p.res[_].success=%d want %d", c.name, res.success, c.pktCount)
   220  			}
   221  			if res.delayed != 0 {
   222  				t.Errorf("Case(%s): p.res[_].delayed=%d, want 0", c.name, res.delayed)
   223  			}
   224  		}
   225  	}
   226  }
   227  
   228  func TestSuccessMultipleCasesDefaultResult(t *testing.T) {
   229  	cases := []struct {
   230  		name        string
   231  		interval    time.Duration
   232  		timeout     time.Duration
   233  		delay       time.Duration
   234  		probeCount  int
   235  		useAllPorts bool
   236  		pktCount    int64
   237  	}{
   238  		// 10 probes, probing each target from 2 ports, at the interval of 200ms, with 100ms timeout and 10ms delay on server.
   239  		{"success_normal", 200, 100, 10, 10, true, 20},
   240  		// 20 probes, probing each target from 2 ports, at the interval of 100ms, with 1000ms timeout and 50ms delay on server.
   241  		{"success_timeout_larger_than_interval_1", 100, 1000, 50, 20, true, 40},
   242  		// 20 probes, probing each target from 2 ports, at the interval of 100ms, with 1000ms timeout and 200ms delay on server.
   243  		{"success_timeout_larger_than_interval_2", 100, 1000, 200, 20, true, 40},
   244  		// 10 probes, probing each target just once, at the interval of 200ms, with 100ms timeout and 10ms delay on server.
   245  		{"single_port", 200, 100, 10, 10, false, 10},
   246  	}
   247  
   248  	for _, c := range cases {
   249  		ctx, cancelServerCtx := context.WithCancel(context.Background())
   250  		port, scs := startUDPServer(ctx, t, false, c.delay*time.Millisecond)
   251  		t.Logf("Case(%s): started server on port %d with delay %v", c.name, port, c.delay)
   252  		conf := &configpb.ProbeConf{
   253  			UseAllTxPortsPerProbe: proto.Bool(c.useAllPorts),
   254  			Port:                  proto.Int32(int32(port)),
   255  			ExportMetricsByPort:   proto.Bool(false),
   256  		}
   257  
   258  		p := runProbe(t, c.interval*time.Millisecond, c.timeout*time.Millisecond, c.probeCount, scs, conf)
   259  		cancelServerCtx()
   260  
   261  		if len(p.connList) != numTxPorts {
   262  			t.Errorf("Case(%s): len(p.connList)=%d, want %d", c.name, len(p.connList), numTxPorts)
   263  		}
   264  		res := p.res[flow{"", "localhost"}]
   265  		if res.total != c.pktCount {
   266  			t.Errorf("Case(%s): p.res[_].total=%d, want %d", c.name, res.total, c.pktCount)
   267  		}
   268  		if res.success != c.pktCount {
   269  			t.Errorf("Case(%s): p.res[_].success=%d want %d", c.name, res.success, c.pktCount)
   270  		}
   271  		if res.delayed != 0 {
   272  			t.Errorf("Case(%s): p.res[_].delayed=%d, want 0", c.name, res.delayed)
   273  		}
   274  	}
   275  }
   276  
   277  func extractMetric(em *metrics.EventMetrics, key string) int64 {
   278  	return em.Metric(key).(*metrics.Int).Int64()
   279  }
   280  
   281  func TestExport(t *testing.T) {
   282  	res := probeResult{
   283  		total:   3,
   284  		success: 2,
   285  		delayed: 1,
   286  		latency: metrics.NewFloat(100.),
   287  	}
   288  	conf := configpb.ProbeConf{
   289  		ExportMetricsByPort: proto.Bool(true),
   290  		Port:                proto.Int32(1234),
   291  	}
   292  	m := res.eventMetrics("probe", &options.Options{}, flow{"port", "target"}, &conf)
   293  	if r := extractMetric(m, "total-per-port"); r != 3 {
   294  		t.Errorf("extractMetric(m,\"total-per-port\")=%d, want 3", r)
   295  	}
   296  	if r := extractMetric(m, "success-per-port"); r != 2 {
   297  		t.Errorf("extractMetric(m,\"success-per-port\")=%d, want 2", r)
   298  	}
   299  	if got, want := m.Label("src_port"), "port"; got != want {
   300  		t.Errorf("m.Label(\"src_port\")=%q, want %q", got, want)
   301  	}
   302  	if got, want := m.Label("dst_port"), "1234"; got != want {
   303  		t.Errorf("m.Label(\"dst_port\")=%q, want %q", got, want)
   304  	}
   305  	conf = configpb.ProbeConf{
   306  		ExportMetricsByPort: proto.Bool(false),
   307  		Port:                proto.Int32(1234),
   308  	}
   309  	m = res.eventMetrics("probe", &options.Options{}, flow{"port", "target"}, &conf)
   310  	if r := extractMetric(m, "total"); r != 3 {
   311  		t.Errorf("extractMetric(m,\"total\")=%d, want 3", r)
   312  	}
   313  	if r := extractMetric(m, "success"); r != 2 {
   314  		t.Errorf("extractMetric(m,\"success\")=%d, want 2", r)
   315  	}
   316  	if got, want := m.Label("src_port"), ""; got != want {
   317  		t.Errorf("m.Label(\"src_port\")=%q, want %q", got, want)
   318  	}
   319  	if got, want := m.Label("dst_port"), ""; got != want {
   320  		t.Errorf("m.Label(\"dst_port\")=%q, want %q", got, want)
   321  	}
   322  }
   323  
   324  func TestLossAndDelayed(t *testing.T) {
   325  	var pktCount int64 = 10
   326  	cases := []struct {
   327  		name     string
   328  		drop     bool
   329  		interval time.Duration
   330  		timeout  time.Duration
   331  		delay    time.Duration
   332  		delayCt  int64
   333  	}{
   334  		// 10 packets, at the interval of 100ms, with 50ms timeout and drop on server.
   335  		{"loss", true, 100, 50, 0, 0},
   336  		// 10 packets, at the interval of 100ms, with 50ms timeout and 67ms delay on server.
   337  		{"delayed_1", false, 100, 50, 67, pktCount},
   338  		// 10 packets, at the interval of 100ms, with 250ms timeout and 300ms delay on server.
   339  		{"delayed_2", false, 100, 250, 300, pktCount},
   340  	}
   341  
   342  	for _, c := range cases {
   343  		ctx, cancelServerCtx := context.WithCancel(context.Background())
   344  		port, scs := startUDPServer(ctx, t, c.drop, c.delay*time.Millisecond)
   345  
   346  		t.Logf("Case(%s): started server on port %d with loss %v delay %v", c.name, port, c.drop, c.delay)
   347  
   348  		conf := &configpb.ProbeConf{
   349  			UseAllTxPortsPerProbe: proto.Bool(true),
   350  			Port:                  proto.Int32(int32(port)),
   351  			ExportMetricsByPort:   proto.Bool(true),
   352  		}
   353  
   354  		p := runProbe(t, c.interval*time.Millisecond, c.timeout*time.Millisecond, int(pktCount), scs, conf)
   355  		cancelServerCtx()
   356  
   357  		if len(p.connList) != numTxPorts {
   358  			t.Errorf("Case(%s): len(p.connList)=%d, want %d", c.name, len(p.connList), numTxPorts)
   359  		}
   360  
   361  		for _, port := range p.srcPortList {
   362  			res := p.res[flow{port, "localhost"}]
   363  			if res.total != pktCount {
   364  				t.Errorf("Case(%s): p.res[_].total=%d, want %d", c.name, res.total, pktCount)
   365  			}
   366  			if res.success != 0 {
   367  				t.Errorf("Case(%s): p.res[_].success=%d want 0", c.name, res.success)
   368  			}
   369  			if res.delayed != c.delayCt {
   370  				t.Errorf("Case(%s): p.res[_].delayed=%d, want %d", c.name, res.delayed, c.delayCt)
   371  			}
   372  		}
   373  	}
   374  }