github.com/m-lab/tcp-info@v1.9.0/saver/saver_test.go (about)

     1  package saver_test
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"io/ioutil"
     8  	"log"
     9  	"math"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"strings"
    14  	"testing"
    15  	"time"
    16  
    17  	"github.com/m-lab/go/anonymize"
    18  
    19  	"github.com/m-lab/tcp-info/eventsocket"
    20  
    21  	"github.com/m-lab/go/rtx"
    22  	"github.com/m-lab/tcp-info/inetdiag"
    23  	"github.com/m-lab/tcp-info/metrics"
    24  	"github.com/m-lab/tcp-info/netlink"
    25  	"github.com/m-lab/tcp-info/saver"
    26  	"github.com/m-lab/tcp-info/zstd"
    27  
    28  	"github.com/prometheus/client_golang/prometheus"
    29  	dto "github.com/prometheus/client_model/go"
    30  )
    31  
    32  // TODO Tests:
    33  //   File closing.
    34  //   Marshaller selection.
    35  //   Rotation  (use 1 second rotation time)
    36  
    37  func init() {
    38  	// Always prepend the filename and line number.
    39  	log.SetFlags(log.LstdFlags | log.Lshortfile)
    40  }
    41  
    42  func dump(t *testing.T, mp *netlink.ArchivalRecord) {
    43  	for i := range mp.Attributes {
    44  		a := mp.Attributes[i]
    45  		if a != nil {
    46  			t.Logf("%d %d %+v\n", i, len(a), a)
    47  		}
    48  	}
    49  }
    50  
    51  type TestMsg struct {
    52  	netlink.NetlinkMessage
    53  }
    54  
    55  func (msg *TestMsg) copy() *TestMsg {
    56  	out := TestMsg{}
    57  	out.Header = msg.Header
    58  	out.Data = make([]byte, len(msg.Data))
    59  	copy(out.Data, msg.Data)
    60  	return &out
    61  }
    62  
    63  func (msg *TestMsg) setCookie(cookie uint64) *TestMsg {
    64  	raw, _ := inetdiag.SplitInetDiagMsg(msg.Data)
    65  	if raw == nil {
    66  		panic("setCookie failed")
    67  	}
    68  	idm, err := raw.Parse()
    69  	if err != nil {
    70  		panic("setCookie failed")
    71  	}
    72  	for i := 0; i < 8; i++ {
    73  		idm.ID.IDiagCookie[i] = byte(cookie & 0x0FF)
    74  		cookie >>= 8
    75  	}
    76  
    77  	return msg
    78  }
    79  func (msg *TestMsg) setDPort(dport uint16) *TestMsg {
    80  	raw, _ := inetdiag.SplitInetDiagMsg(msg.Data)
    81  	if raw == nil {
    82  		panic("setCookie failed")
    83  	}
    84  	idm, err := raw.Parse()
    85  	if err != nil {
    86  		panic("setCookie failed")
    87  	}
    88  	for i := 0; i < 2; i++ {
    89  		idm.ID.IDiagDPort[i] = byte(dport & 0x0FF)
    90  		dport >>= 8
    91  	}
    92  
    93  	return msg
    94  }
    95  
    96  func (msg *TestMsg) mustAR() *netlink.ArchivalRecord {
    97  	ar, err := netlink.MakeArchivalRecord(&msg.NetlinkMessage, nil)
    98  	if err != nil {
    99  		panic(err)
   100  	}
   101  	if ar == nil {
   102  		panic("nil ar - probably a local connection")
   103  	}
   104  
   105  	if len(ar.Attributes) <= inetdiag.INET_DIAG_INFO {
   106  		panic("No INET_DIAG_INFO message")
   107  	}
   108  
   109  	return ar
   110  }
   111  
   112  func (msg *TestMsg) setByte(offset int, value byte) *TestMsg {
   113  	ar := msg.mustAR()
   114  	ar.Attributes[inetdiag.INET_DIAG_INFO][offset] = value
   115  
   116  	return msg
   117  }
   118  
   119  func msg(t *testing.T, cookie uint64, dport uint16) *TestMsg {
   120  	// NOTE: If the INET_DIAG_INFO message gets larger, this may cause unit tests to fail, because
   121  	// the TestMsg assumes the message decoding is done in place.
   122  	var json1 = `{"Header":{"Len":420,"Type":20,"Flags":2,"Seq":1,"Pid":235855},"Data":"CgECAIaYE6cmIAAAEAMEFkrF0ry7OloFJgf4sEAMDAYAAAAAAAAAgQAAAABI6AcBAAAAAJgmAAAAAAAAAAAAAAAAAACsINMLBQAIAAAAAAAFAAUAIAAAAAUABgAgAAAAFAABAAAAAAAAAAAAAAAAAAAAAAAoAAcAAAAAAICiBQAAAAAAALQAAAAAAAAAAAAAAAAAAAAAAAAAAAAA5AACAAEAAAAAB3gBYFsDAECcAAB2BQAAGAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAA2BEAAAAAAACEEQAAyBEAANwFAABAgQAAL0gAACEAAAAHAAAACgAAAJQFAAADAAAAAAAAAIBwAAAAAAAAQdoNAAAAAAD///////////4zAAAAAAAADhAAAAAAAADgAAAA4QAAAAAAAADYRgAAJgAAAC8AAACi4gYAAAAAAGArCwAAAAAAAAAAAAAAAAAAAAAAAAAAADAAAAAAAAAA/TMAAAAAAAAAAAAAAAAAAAAAAAAAAAAACgAEAGN1YmljAAAACAARAAAAAAA="}`
   123  	nm := netlink.NetlinkMessage{}
   124  	err := json.Unmarshal([]byte(json1), &nm)
   125  	if err != nil {
   126  		t.Log(err)
   127  		return nil
   128  	}
   129  
   130  	msg := &TestMsg{nm}
   131  
   132  	msg = msg.setCookie(cookie).setDPort(dport)
   133  	return msg
   134  }
   135  
   136  func (msg *TestMsg) setBytesSent(value uint64) *TestMsg {
   137  	ar := msg.mustAR()
   138  	ar.SetBytesSent(value)
   139  	return msg
   140  }
   141  
   142  func (msg *TestMsg) setBytesReceived(value uint64) *TestMsg {
   143  	ar := msg.mustAR()
   144  	ar.SetBytesReceived(value)
   145  	return msg
   146  }
   147  
   148  func verifySizeBetween(t *testing.T, minSize, maxSize int64, pattern string) {
   149  	names, err := filepath.Glob(pattern)
   150  	rtx.Must(err, "Could not Glob pattern %s", pattern)
   151  	if len(names) != 1 {
   152  		t.Fatal("The glob", pattern, "should return exactly one file, not", len(names))
   153  	}
   154  	filename := names[0]
   155  	info, err := os.Stat(filename)
   156  	if err != nil {
   157  		t.Fatal(err)
   158  	}
   159  	if info.Size() < minSize || info.Size() > maxSize {
   160  		_, file, line, _ := runtime.Caller(1)
   161  		t.Error("Size of", filename, " (", info.Size(), ") is out of bounds.  We expect", minSize, "<=", info.Size(), "<=", maxSize, "at", file, line, ".")
   162  	}
   163  }
   164  
   165  func histContains(m prometheus.Metric, s string) bool {
   166  	var mm dto.Metric
   167  	m.Write(&mm)
   168  	h := mm.GetHistogram()
   169  	if h == nil {
   170  		log.Println(h)
   171  		return false
   172  	}
   173  	if strings.Contains(h.String(), s) {
   174  		return true
   175  	}
   176  	log.Println("Expected:", s, "Got:", h)
   177  	return false
   178  }
   179  
   180  func checkCounter(t *testing.T, c chan prometheus.Metric, expected float64) {
   181  	m := <-c
   182  	v := counterValue(m)
   183  	if v != expected {
   184  		t.Error("For", m.Desc(), "expected:", expected, "got:", v)
   185  	}
   186  }
   187  
   188  func counterValue(m prometheus.Metric) float64 {
   189  	var mm dto.Metric
   190  	m.Write(&mm)
   191  	ctr := mm.GetCounter()
   192  	if ctr == nil {
   193  		log.Println(mm.GetUntyped())
   194  		return math.Inf(-1)
   195  	}
   196  
   197  	return *ctr.Value
   198  }
   199  
   200  type countingEventSocket struct {
   201  	opens, closes int
   202  }
   203  
   204  func (*countingEventSocket) Listen() error                                              { return nil }
   205  func (*countingEventSocket) Serve(context.Context) error                                { return nil }
   206  func (c *countingEventSocket) FlowCreated(t time.Time, uuid string, id inetdiag.SockID) { c.opens++ }
   207  func (c *countingEventSocket) FlowDeleted(t time.Time, uuid string)                     { c.closes++ }
   208  
   209  func TestHistograms(t *testing.T) {
   210  	dir, err := ioutil.TempDir("", "tcp-info_saver_TestBasic")
   211  	rtx.Must(err, "Could not create tempdir")
   212  	fmt.Println("Directory is:", dir)
   213  	oldDir, err := os.Getwd()
   214  	rtx.Must(err, "Could not get working directory")
   215  	rtx.Must(os.Chdir(dir), "Could not switch to temp dir %s", dir)
   216  	defer func() {
   217  		os.RemoveAll(dir)
   218  		rtx.Must(os.Chdir(oldDir), "Could not switch back to %s", oldDir)
   219  	}()
   220  	eventCounts := &countingEventSocket{}
   221  	anon := anonymize.New(anonymize.None)
   222  	svr := saver.NewSaver("foo", "bar", 1, eventCounts, anon, nil)
   223  	svrChan := make(chan netlink.MessageBlock, 0) // no buffering
   224  	go svr.MessageSaverLoop(svrChan)
   225  
   226  	date := time.Date(2018, 02, 06, 11, 12, 13, 0, time.UTC)
   227  	mb := netlink.MessageBlock{V4Time: date, V6Time: date}
   228  
   229  	// Create basic messages.  These are not internally consistent!!
   230  	m1 := msg(t, 11234, 1).setBytesReceived(0).setBytesSent(0)
   231  	m2 := msg(t, 235, 2).setBytesReceived(1000).setBytesSent(2000)
   232  
   233  	// This round just initializes the cache, and causes the first set of rate histogram observations.
   234  	// Received = 1000, Sent = 2000
   235  	// Results in 2 snapshots, 1 stats observation, (8000 bits received, 16000 bits sent)
   236  	mb.V4Messages = []*netlink.NetlinkMessage{&m1.NetlinkMessage, &m2.NetlinkMessage}
   237  	svrChan <- mb
   238  
   239  	// This closes the second connection at 401 milliseconds.
   240  	mb.V4Messages = []*netlink.NetlinkMessage{&m1.NetlinkMessage}
   241  	mb.V4Time = mb.V4Time.Add(401 * time.Millisecond)
   242  	svrChan <- mb
   243  
   244  	// This modifies the SndMSS field in the first connection.  This causes write to the file, but no stats.
   245  	// 1 snapshot
   246  	m1a := m1.copy().setByte(20, 127)
   247  	mb.V4Messages = []*netlink.NetlinkMessage{&m1a.NetlinkMessage}
   248  	mb.V4Time = mb.V4Time.Add(401 * time.Millisecond)
   249  	svrChan <- mb
   250  
   251  	// This changes the first connection again, increasing the BytesReceived (20000) and BytesSent (10000) fields.
   252  	// This also causes the time to roll over a second boundary, and generate stats histogram observations.
   253  	// 1 snapshot, 1 stats observation (160,000 bits received, 80,000 bits sent)
   254  	m1b := m1a.copy().setBytesReceived(20000).setBytesSent(10000)
   255  	mb.V4Messages = []*netlink.NetlinkMessage{&m1b.NetlinkMessage}
   256  	mb.V4Time = mb.V4Time.Add(401 * time.Millisecond)
   257  	svrChan <- mb
   258  
   259  	// 1 snapshot, no additional stats
   260  	m1c := m1b.copy().setBytesSent(100000) // This increases BytesSent from 10K to 100K.
   261  	mb.V4Messages = []*netlink.NetlinkMessage{&m1c.NetlinkMessage}
   262  	mb.V4Time = mb.V4Time.Add(401 * time.Millisecond)
   263  	svrChan <- mb
   264  
   265  	// This closes the first connection and rolls over another observation boundary.
   266  	// We should see an observation of 0 Received and 720,000 bits Sent.
   267  	// 0 snapshots, 1 stats observation.
   268  	mb.V4Messages = []*netlink.NetlinkMessage{}
   269  	mb.V4Time = mb.V4Time.Add(401 * time.Millisecond)
   270  	svrChan <- mb
   271  
   272  	// Close the channel and wait until all MessageBlocks are finished processing.
   273  	close(svrChan)
   274  	svr.Done.Wait()
   275  
   276  	// This section checks that prom metrics are updated appropriately.
   277  	c := make(chan prometheus.Metric, 10)
   278  
   279  	// There should have been three observations, (16,000, 80,000, 720,000) totalling 816000 bits.
   280  	metrics.SendRateHistogram.Collect(c)
   281  	m := <-c
   282  	if !histContains(m, "sample_count:3") {
   283  		t.Error("Wrong sample count")
   284  	}
   285  	if !histContains(m, "sample_sum:816000") { // 2000 bytes + 100000 bytes
   286  		t.Error("Wrong sample sum")
   287  	}
   288  
   289  	// There should have been three observations, totalling 168000 bits.
   290  	metrics.ReceiveRateHistogram.Collect(c)
   291  	m = <-c
   292  	if !histContains(m, "sample_count:3") {
   293  		t.Error("Wrong sample count")
   294  	}
   295  	if !histContains(m, "sample_sum:168000") { // 1000 bytes + 20000 bytes
   296  		t.Error("Wrong sample sum")
   297  	}
   298  
   299  	// For counts in Received, we expect
   300  	// 1 in bucket 0
   301  	// 2 in bucket 10000
   302  	// 3 in bucket 200000
   303  	if !histContains(m, "cumulative_count:2 upper_bound:10000") {
   304  		t.Error("Wrong count for 10000 bucket")
   305  	}
   306  	if !histContains(m, "cumulative_count:3 upper_bound:251000") {
   307  		t.Error("Wrong bucket count for 200000 bucket")
   308  	}
   309  
   310  	// We should have seen 2 different connections.
   311  	metrics.NewFileCount.Collect(c)
   312  	checkCounter(t, c, 2)
   313  	if eventCounts.opens != 2 || eventCounts.closes != 2 {
   314  		t.Errorf("Should have {opens:2, closes:2} not %+v", *eventCounts)
   315  	}
   316  
   317  	// We should have seen total of 4 snapshots.
   318  	metrics.SnapshotCount.Collect(c)
   319  	checkCounter(t, c, 4)
   320  
   321  	close(c)
   322  
   323  	// We have to use a range-based size verification because different versions of
   324  	// zstd have slightly different compression ratios.
   325  	// The min/max criteria are based on zstd 1.3.8.
   326  	// These may change with different zstd versions.
   327  	verifySizeBetween(t, 380, 500, "2018/02/06/*_0000000000002BE2.00000.jsonl.zst")
   328  	verifySizeBetween(t, 350, 450, "2018/02/06/*_00000000000000EB.00000.jsonl.zst")
   329  }
   330  
   331  // TODO - this file contains connection data from a connection with FIN_WAIT2 and no DiagInfo.
   332  // Need to create fake NetlinkMessage stream, and send to saver, and test behavior.
   333  func TestFinWait2NotImplemented(t *testing.T) {
   334  	source := "testdata/finwait2-sample_1554836592_unsafe_000000000135A272.00000.jsonl.zst"
   335  	rdr := zstd.NewReader(source)
   336  	defer rdr.Close()
   337  	msgs, err := netlink.LoadAllArchivalRecords(rdr)
   338  	if err != nil {
   339  		t.Fatal(err)
   340  	}
   341  
   342  	anon := anonymize.New(anonymize.None)
   343  	svr := saver.NewSaver("hostname", "fakePod", 1, eventsocket.NullServer(), anon, nil)
   344  	blockChan := make(chan netlink.MessageBlock, 0)
   345  	go svr.MessageSaverLoop(blockChan)
   346  	for i := range msgs {
   347  		ar := msgs[i]
   348  		s, r := ar.GetStats()
   349  		// TODO Fill in when we have a way to create NetlinkMessage from ArchiveRecord
   350  		mb := netlink.MessageBlock{}
   351  		blockChan <- mb
   352  
   353  		log.Println(s, r, ar.HasDiagInfo())
   354  	}
   355  
   356  	close(blockChan)
   357  	svr.Done.Wait()
   358  	svr.LogCacheStats(1, 0)
   359  	t.Log("Test not implemented")
   360  }
   361  
   362  // If this compiles, the "test" passes
   363  func assertSaverIsACacheLogger(s *saver.Saver) {
   364  	func(csl saver.CacheLogger) {}(s)
   365  }