github.com/m-lab/tcp-info@v1.9.0/saver/saver_test.go (about) 1 package saver_test 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "io/ioutil" 8 "log" 9 "math" 10 "os" 11 "path/filepath" 12 "runtime" 13 "strings" 14 "testing" 15 "time" 16 17 "github.com/m-lab/go/anonymize" 18 19 "github.com/m-lab/tcp-info/eventsocket" 20 21 "github.com/m-lab/go/rtx" 22 "github.com/m-lab/tcp-info/inetdiag" 23 "github.com/m-lab/tcp-info/metrics" 24 "github.com/m-lab/tcp-info/netlink" 25 "github.com/m-lab/tcp-info/saver" 26 "github.com/m-lab/tcp-info/zstd" 27 28 "github.com/prometheus/client_golang/prometheus" 29 dto "github.com/prometheus/client_model/go" 30 ) 31 32 // TODO Tests: 33 // File closing. 34 // Marshaller selection. 35 // Rotation (use 1 second rotation time) 36 37 func init() { 38 // Always prepend the filename and line number. 39 log.SetFlags(log.LstdFlags | log.Lshortfile) 40 } 41 42 func dump(t *testing.T, mp *netlink.ArchivalRecord) { 43 for i := range mp.Attributes { 44 a := mp.Attributes[i] 45 if a != nil { 46 t.Logf("%d %d %+v\n", i, len(a), a) 47 } 48 } 49 } 50 51 type TestMsg struct { 52 netlink.NetlinkMessage 53 } 54 55 func (msg *TestMsg) copy() *TestMsg { 56 out := TestMsg{} 57 out.Header = msg.Header 58 out.Data = make([]byte, len(msg.Data)) 59 copy(out.Data, msg.Data) 60 return &out 61 } 62 63 func (msg *TestMsg) setCookie(cookie uint64) *TestMsg { 64 raw, _ := inetdiag.SplitInetDiagMsg(msg.Data) 65 if raw == nil { 66 panic("setCookie failed") 67 } 68 idm, err := raw.Parse() 69 if err != nil { 70 panic("setCookie failed") 71 } 72 for i := 0; i < 8; i++ { 73 idm.ID.IDiagCookie[i] = byte(cookie & 0x0FF) 74 cookie >>= 8 75 } 76 77 return msg 78 } 79 func (msg *TestMsg) setDPort(dport uint16) *TestMsg { 80 raw, _ := inetdiag.SplitInetDiagMsg(msg.Data) 81 if raw == nil { 82 panic("setCookie failed") 83 } 84 idm, err := raw.Parse() 85 if err != nil { 86 panic("setCookie failed") 87 } 88 for i := 0; i < 2; i++ { 89 idm.ID.IDiagDPort[i] = byte(dport & 0x0FF) 90 dport >>= 8 91 } 92 93 return msg 94 } 95 96 func (msg *TestMsg) mustAR() *netlink.ArchivalRecord { 97 ar, err := netlink.MakeArchivalRecord(&msg.NetlinkMessage, nil) 98 if err != nil { 99 panic(err) 100 } 101 if ar == nil { 102 panic("nil ar - probably a local connection") 103 } 104 105 if len(ar.Attributes) <= inetdiag.INET_DIAG_INFO { 106 panic("No INET_DIAG_INFO message") 107 } 108 109 return ar 110 } 111 112 func (msg *TestMsg) setByte(offset int, value byte) *TestMsg { 113 ar := msg.mustAR() 114 ar.Attributes[inetdiag.INET_DIAG_INFO][offset] = value 115 116 return msg 117 } 118 119 func msg(t *testing.T, cookie uint64, dport uint16) *TestMsg { 120 // NOTE: If the INET_DIAG_INFO message gets larger, this may cause unit tests to fail, because 121 // the TestMsg assumes the message decoding is done in place. 122 var json1 = `{"Header":{"Len":420,"Type":20,"Flags":2,"Seq":1,"Pid":235855},"Data":"CgECAIaYE6cmIAAAEAMEFkrF0ry7OloFJgf4sEAMDAYAAAAAAAAAgQAAAABI6AcBAAAAAJgmAAAAAAAAAAAAAAAAAACsINMLBQAIAAAAAAAFAAUAIAAAAAUABgAgAAAAFAABAAAAAAAAAAAAAAAAAAAAAAAoAAcAAAAAAICiBQAAAAAAALQAAAAAAAAAAAAAAAAAAAAAAAAAAAAA5AACAAEAAAAAB3gBYFsDAECcAAB2BQAAGAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAA2BEAAAAAAACEEQAAyBEAANwFAABAgQAAL0gAACEAAAAHAAAACgAAAJQFAAADAAAAAAAAAIBwAAAAAAAAQdoNAAAAAAD///////////4zAAAAAAAADhAAAAAAAADgAAAA4QAAAAAAAADYRgAAJgAAAC8AAACi4gYAAAAAAGArCwAAAAAAAAAAAAAAAAAAAAAAAAAAADAAAAAAAAAA/TMAAAAAAAAAAAAAAAAAAAAAAAAAAAAACgAEAGN1YmljAAAACAARAAAAAAA="}` 123 nm := netlink.NetlinkMessage{} 124 err := json.Unmarshal([]byte(json1), &nm) 125 if err != nil { 126 t.Log(err) 127 return nil 128 } 129 130 msg := &TestMsg{nm} 131 132 msg = msg.setCookie(cookie).setDPort(dport) 133 return msg 134 } 135 136 func (msg *TestMsg) setBytesSent(value uint64) *TestMsg { 137 ar := msg.mustAR() 138 ar.SetBytesSent(value) 139 return msg 140 } 141 142 func (msg *TestMsg) setBytesReceived(value uint64) *TestMsg { 143 ar := msg.mustAR() 144 ar.SetBytesReceived(value) 145 return msg 146 } 147 148 func verifySizeBetween(t *testing.T, minSize, maxSize int64, pattern string) { 149 names, err := filepath.Glob(pattern) 150 rtx.Must(err, "Could not Glob pattern %s", pattern) 151 if len(names) != 1 { 152 t.Fatal("The glob", pattern, "should return exactly one file, not", len(names)) 153 } 154 filename := names[0] 155 info, err := os.Stat(filename) 156 if err != nil { 157 t.Fatal(err) 158 } 159 if info.Size() < minSize || info.Size() > maxSize { 160 _, file, line, _ := runtime.Caller(1) 161 t.Error("Size of", filename, " (", info.Size(), ") is out of bounds. We expect", minSize, "<=", info.Size(), "<=", maxSize, "at", file, line, ".") 162 } 163 } 164 165 func histContains(m prometheus.Metric, s string) bool { 166 var mm dto.Metric 167 m.Write(&mm) 168 h := mm.GetHistogram() 169 if h == nil { 170 log.Println(h) 171 return false 172 } 173 if strings.Contains(h.String(), s) { 174 return true 175 } 176 log.Println("Expected:", s, "Got:", h) 177 return false 178 } 179 180 func checkCounter(t *testing.T, c chan prometheus.Metric, expected float64) { 181 m := <-c 182 v := counterValue(m) 183 if v != expected { 184 t.Error("For", m.Desc(), "expected:", expected, "got:", v) 185 } 186 } 187 188 func counterValue(m prometheus.Metric) float64 { 189 var mm dto.Metric 190 m.Write(&mm) 191 ctr := mm.GetCounter() 192 if ctr == nil { 193 log.Println(mm.GetUntyped()) 194 return math.Inf(-1) 195 } 196 197 return *ctr.Value 198 } 199 200 type countingEventSocket struct { 201 opens, closes int 202 } 203 204 func (*countingEventSocket) Listen() error { return nil } 205 func (*countingEventSocket) Serve(context.Context) error { return nil } 206 func (c *countingEventSocket) FlowCreated(t time.Time, uuid string, id inetdiag.SockID) { c.opens++ } 207 func (c *countingEventSocket) FlowDeleted(t time.Time, uuid string) { c.closes++ } 208 209 func TestHistograms(t *testing.T) { 210 dir, err := ioutil.TempDir("", "tcp-info_saver_TestBasic") 211 rtx.Must(err, "Could not create tempdir") 212 fmt.Println("Directory is:", dir) 213 oldDir, err := os.Getwd() 214 rtx.Must(err, "Could not get working directory") 215 rtx.Must(os.Chdir(dir), "Could not switch to temp dir %s", dir) 216 defer func() { 217 os.RemoveAll(dir) 218 rtx.Must(os.Chdir(oldDir), "Could not switch back to %s", oldDir) 219 }() 220 eventCounts := &countingEventSocket{} 221 anon := anonymize.New(anonymize.None) 222 svr := saver.NewSaver("foo", "bar", 1, eventCounts, anon, nil) 223 svrChan := make(chan netlink.MessageBlock, 0) // no buffering 224 go svr.MessageSaverLoop(svrChan) 225 226 date := time.Date(2018, 02, 06, 11, 12, 13, 0, time.UTC) 227 mb := netlink.MessageBlock{V4Time: date, V6Time: date} 228 229 // Create basic messages. These are not internally consistent!! 230 m1 := msg(t, 11234, 1).setBytesReceived(0).setBytesSent(0) 231 m2 := msg(t, 235, 2).setBytesReceived(1000).setBytesSent(2000) 232 233 // This round just initializes the cache, and causes the first set of rate histogram observations. 234 // Received = 1000, Sent = 2000 235 // Results in 2 snapshots, 1 stats observation, (8000 bits received, 16000 bits sent) 236 mb.V4Messages = []*netlink.NetlinkMessage{&m1.NetlinkMessage, &m2.NetlinkMessage} 237 svrChan <- mb 238 239 // This closes the second connection at 401 milliseconds. 240 mb.V4Messages = []*netlink.NetlinkMessage{&m1.NetlinkMessage} 241 mb.V4Time = mb.V4Time.Add(401 * time.Millisecond) 242 svrChan <- mb 243 244 // This modifies the SndMSS field in the first connection. This causes write to the file, but no stats. 245 // 1 snapshot 246 m1a := m1.copy().setByte(20, 127) 247 mb.V4Messages = []*netlink.NetlinkMessage{&m1a.NetlinkMessage} 248 mb.V4Time = mb.V4Time.Add(401 * time.Millisecond) 249 svrChan <- mb 250 251 // This changes the first connection again, increasing the BytesReceived (20000) and BytesSent (10000) fields. 252 // This also causes the time to roll over a second boundary, and generate stats histogram observations. 253 // 1 snapshot, 1 stats observation (160,000 bits received, 80,000 bits sent) 254 m1b := m1a.copy().setBytesReceived(20000).setBytesSent(10000) 255 mb.V4Messages = []*netlink.NetlinkMessage{&m1b.NetlinkMessage} 256 mb.V4Time = mb.V4Time.Add(401 * time.Millisecond) 257 svrChan <- mb 258 259 // 1 snapshot, no additional stats 260 m1c := m1b.copy().setBytesSent(100000) // This increases BytesSent from 10K to 100K. 261 mb.V4Messages = []*netlink.NetlinkMessage{&m1c.NetlinkMessage} 262 mb.V4Time = mb.V4Time.Add(401 * time.Millisecond) 263 svrChan <- mb 264 265 // This closes the first connection and rolls over another observation boundary. 266 // We should see an observation of 0 Received and 720,000 bits Sent. 267 // 0 snapshots, 1 stats observation. 268 mb.V4Messages = []*netlink.NetlinkMessage{} 269 mb.V4Time = mb.V4Time.Add(401 * time.Millisecond) 270 svrChan <- mb 271 272 // Close the channel and wait until all MessageBlocks are finished processing. 273 close(svrChan) 274 svr.Done.Wait() 275 276 // This section checks that prom metrics are updated appropriately. 277 c := make(chan prometheus.Metric, 10) 278 279 // There should have been three observations, (16,000, 80,000, 720,000) totalling 816000 bits. 280 metrics.SendRateHistogram.Collect(c) 281 m := <-c 282 if !histContains(m, "sample_count:3") { 283 t.Error("Wrong sample count") 284 } 285 if !histContains(m, "sample_sum:816000") { // 2000 bytes + 100000 bytes 286 t.Error("Wrong sample sum") 287 } 288 289 // There should have been three observations, totalling 168000 bits. 290 metrics.ReceiveRateHistogram.Collect(c) 291 m = <-c 292 if !histContains(m, "sample_count:3") { 293 t.Error("Wrong sample count") 294 } 295 if !histContains(m, "sample_sum:168000") { // 1000 bytes + 20000 bytes 296 t.Error("Wrong sample sum") 297 } 298 299 // For counts in Received, we expect 300 // 1 in bucket 0 301 // 2 in bucket 10000 302 // 3 in bucket 200000 303 if !histContains(m, "cumulative_count:2 upper_bound:10000") { 304 t.Error("Wrong count for 10000 bucket") 305 } 306 if !histContains(m, "cumulative_count:3 upper_bound:251000") { 307 t.Error("Wrong bucket count for 200000 bucket") 308 } 309 310 // We should have seen 2 different connections. 311 metrics.NewFileCount.Collect(c) 312 checkCounter(t, c, 2) 313 if eventCounts.opens != 2 || eventCounts.closes != 2 { 314 t.Errorf("Should have {opens:2, closes:2} not %+v", *eventCounts) 315 } 316 317 // We should have seen total of 4 snapshots. 318 metrics.SnapshotCount.Collect(c) 319 checkCounter(t, c, 4) 320 321 close(c) 322 323 // We have to use a range-based size verification because different versions of 324 // zstd have slightly different compression ratios. 325 // The min/max criteria are based on zstd 1.3.8. 326 // These may change with different zstd versions. 327 verifySizeBetween(t, 380, 500, "2018/02/06/*_0000000000002BE2.00000.jsonl.zst") 328 verifySizeBetween(t, 350, 450, "2018/02/06/*_00000000000000EB.00000.jsonl.zst") 329 } 330 331 // TODO - this file contains connection data from a connection with FIN_WAIT2 and no DiagInfo. 332 // Need to create fake NetlinkMessage stream, and send to saver, and test behavior. 333 func TestFinWait2NotImplemented(t *testing.T) { 334 source := "testdata/finwait2-sample_1554836592_unsafe_000000000135A272.00000.jsonl.zst" 335 rdr := zstd.NewReader(source) 336 defer rdr.Close() 337 msgs, err := netlink.LoadAllArchivalRecords(rdr) 338 if err != nil { 339 t.Fatal(err) 340 } 341 342 anon := anonymize.New(anonymize.None) 343 svr := saver.NewSaver("hostname", "fakePod", 1, eventsocket.NullServer(), anon, nil) 344 blockChan := make(chan netlink.MessageBlock, 0) 345 go svr.MessageSaverLoop(blockChan) 346 for i := range msgs { 347 ar := msgs[i] 348 s, r := ar.GetStats() 349 // TODO Fill in when we have a way to create NetlinkMessage from ArchiveRecord 350 mb := netlink.MessageBlock{} 351 blockChan <- mb 352 353 log.Println(s, r, ar.HasDiagInfo()) 354 } 355 356 close(blockChan) 357 svr.Done.Wait() 358 svr.LogCacheStats(1, 0) 359 t.Log("Test not implemented") 360 } 361 362 // If this compiles, the "test" passes 363 func assertSaverIsACacheLogger(s *saver.Saver) { 364 func(csl saver.CacheLogger) {}(s) 365 }