github.com/nikandfor/tlog@v0.21.5-0.20231108111739-3ef89426a96d/tlz/compress_test.go (about) 1 package tlz 2 3 import ( 4 "bytes" 5 "encoding/hex" 6 "flag" 7 "io" 8 "io/ioutil" 9 "testing" 10 11 //"github.com/nikandfor/assert" 12 "github.com/nikandfor/errors" 13 "github.com/stretchr/testify/assert" 14 15 "github.com/nikandfor/tlog" 16 "github.com/nikandfor/tlog/low" 17 "github.com/nikandfor/tlog/tlio" 18 "github.com/nikandfor/tlog/tlwire" 19 ) 20 21 var fileFlag = flag.String("test-file", "../log.tlog", "file with tlog logs") 22 23 var ( 24 testData []byte 25 testOff []int 26 testsCount int 27 ) 28 29 func TestFileMagic(t *testing.T) { 30 var buf low.Buf 31 32 w := NewEncoder(&buf, MiB) 33 34 _, err := w.Write([]byte{}) 35 assert.NoError(t, err) 36 37 if assert.True(t, len(buf) >= len(FileMagic)) { 38 assert.Equal(t, FileMagic, string(buf[:len(FileMagic)])) 39 } 40 } 41 42 func TestLiteral(t *testing.T) { 43 const B = 32 44 45 var buf low.Buf 46 47 w := newEncoder(&buf, B, 1) 48 49 n, err := w.Write([]byte("very_first_message")) 50 assert.Equal(t, 18, n) 51 assert.NoError(t, err) 52 53 t.Logf("buf pos %x ht %x\n%v", w.pos, w.ht, hex.Dump(w.block)) 54 t.Logf("res\n%v", hex.Dump(buf)) 55 t.Logf("res\n%v", Dump(buf)) 56 57 r := &Decoder{ 58 b: buf, 59 } 60 61 p := make([]byte, 100) 62 63 t.Logf("*** read back ***") 64 65 n, err = r.Read(p[:10]) 66 assert.Equal(t, 10, n) 67 assert.NoError(t, err) 68 assert.Equal(t, []byte("very_first"), p[:n]) 69 70 copy(p[:10], zeros) 71 72 n, err = r.Read(p[:10]) 73 assert.Equal(t, 8, n) 74 assert.Equal(t, io.EOF, err) 75 assert.Equal(t, []byte("_message"), p[:n]) 76 } 77 78 func TestCopy(t *testing.T) { 79 const B = 32 80 81 var buf low.Buf 82 83 w := newEncoder(&buf, B, 1) 84 85 st := 0 86 87 n, err := w.Write([]byte("prefix_1234_suffix")) 88 assert.Equal(t, 18, n) 89 assert.NoError(t, err) 90 91 t.Logf("buf pos %x ht %x\n%v", w.pos, w.ht, hex.Dump(w.block)) 92 t.Logf("res\n%v", hex.Dump(buf[st:])) 93 94 st = len(buf) 95 96 n, err = w.Write([]byte("prefix_567_suffix")) 97 assert.Equal(t, 17, n) 98 assert.NoError(t, err) 99 100 t.Logf("buf pos %x ht %x\n%v", w.pos, w.ht, hex.Dump(w.block)) 101 t.Logf("res\n%v", hex.Dump(buf[st:])) 102 103 r := &Decoder{ 104 b: buf, 105 } 106 107 p := make([]byte, 100) 108 109 t.Logf("*** read back ***") 110 111 n, err = r.Read(p[:10]) 112 assert.Equal(t, 10, n) 113 assert.NoError(t, err) 114 assert.Equal(t, []byte("prefix_123"), p[:n]) 115 116 t.Logf("buf pos %x\n%v", r.pos, hex.Dump(r.block)) 117 118 n, err = r.Read(p[:10]) 119 assert.Equal(t, 10, n) 120 assert.NoError(t, err) 121 assert.Equal(t, []byte("4_suffixpr"), p[:n]) 122 123 t.Logf("buf pos %x\n%v", r.pos, hex.Dump(r.block)) 124 125 n, err = r.Read(p[:30]) 126 assert.Equal(t, 15, n) 127 assert.Equal(t, io.EOF, err) 128 assert.Equal(t, []byte("efix_567_suffix"), p[:n]) 129 130 t.Logf("buf pos %x\n%v", r.pos, hex.Dump(r.block)) 131 132 // t.Logf("compression ratio: %.3f", float64(18+17)/float64(len(buf))) 133 } 134 135 func TestDumpOnelineText(t *testing.T) { 136 t.Skip() 137 138 var dump, text low.Buf 139 140 d := NewDumper(&dump) 141 e := newEncoder(d, 1*1024, 2) 142 143 cw := tlog.NewConsoleWriter(tlio.NewMultiWriter(e, &text), tlog.LstdFlags) 144 145 l := tlog.New(cw) 146 tr := l.Start("span_name") 147 148 types := []string{"type_a", "value_b", "qweqew", "asdads"} 149 150 for i := 0; i < 20; i++ { 151 // tr := l.Start("span_name") 152 tr.Printw("some example message", "i", i, "type", types[i%len(types)]) 153 // tr.Finish() 154 } 155 156 t.Logf("text:\n%s", text) 157 t.Logf("dump:\n%s", dump) 158 } 159 160 func TestBug1(t *testing.T) { 161 // tl = tlog.NewTestLogger(t, "", nil) 162 // tlog.DefaultLogger = tl 163 164 var b bytes.Buffer 165 166 p := make([]byte, 1000) 167 d := NewDecoder(&b) 168 169 // tl.Printw("first") 170 171 _, _ = b.Write([]byte{Literal | Meta, MetaReset | 0, 4}) 172 _, _ = b.Write([]byte{Literal | 3, 0x94, 0xa8, 0xfb, Copy | 9}) 173 174 n, err := d.Read(p) 175 assert.ErrorIs(t, err, io.ErrUnexpectedEOF) 176 assert.Equal(t, 3, n) 177 178 // tl.Printw("second") 179 180 _, _ = b.Write([]byte{0xfd, 0x03, 0x65}) // offset 181 182 n, err = d.Read(p) 183 assert.ErrorIs(t, err, io.EOF) 184 assert.Equal(t, 9, n) 185 } 186 187 func TestOnFile(t *testing.T) { 188 err := loadTestFile(t, *fileFlag) 189 if err != nil { 190 t.Skipf("loading data: %v", err) 191 } 192 193 var encoded bytes.Buffer 194 var full bytes.Buffer 195 w := NewEncoderHTSize(tlio.NewMultiWriter(&encoded, &full), 512, 256) 196 r := NewDecoder(&encoded) 197 var buf []byte 198 199 // dumper := tlwire.NewDumper(os.Stderr) 200 201 for i := 0; i < testsCount; i++ { 202 msg := testData[testOff[i]:testOff[i+1]] 203 204 // _, _ = dumper.Write(msg) 205 206 n, err := w.Write(msg) 207 assert.NoError(t, err) 208 assert.Equal(t, len(msg), n) 209 210 for n > len(buf) { 211 buf = append(buf[:cap(buf)], 0, 0, 0, 0, 0, 0, 0, 0) 212 } 213 214 n, err = r.Read(buf[:n]) 215 assert.NoError(t, err) 216 assert.Equal(t, len(msg), n) 217 218 assert.Equal(t, msg, []byte(buf[:n])) 219 220 if t.Failed() { 221 break 222 } 223 } 224 225 r.Reset(&full) 226 buf = buf[:0] 227 228 var dec bytes.Buffer 229 230 n, err := io.Copy(&dec, r) 231 assert.NoError(t, err) 232 assert.Equal(t, int(n), dec.Len()) 233 234 min := dec.Len() 235 assert.Equal(t, testData[:min], dec.Bytes()) 236 237 // t.Logf("metrics: %v bytes %v events %v", mm, dec.Len(), testsCount) 238 } 239 240 func BenchmarkLogCompressOneline(b *testing.B) { 241 b.ReportAllocs() 242 243 var full, small tlio.CountingIODiscard 244 w := NewEncoder(&small, 128*1024) 245 246 l := tlog.New(io.MultiWriter(&full, w)) 247 tr := l.Start("span_name") 248 249 types := []string{"type_a", "value_b", "qweqew", "asdads"} 250 251 for i := 0; i < b.N; i++ { 252 // tr := l.Start("span_name") 253 tr.Printw("some example message", "i", i, "type", types[i%len(types)]) 254 // tr.Finish() 255 } 256 257 b.SetBytes(full.Bytes.Load() / int64(b.N)) 258 b.ReportMetric(float64(full.Bytes.Load())/float64(small.Bytes.Load()), "ratio") 259 } 260 261 func BenchmarkLogCompressOnelineText(b *testing.B) { 262 b.ReportAllocs() 263 264 var full, small tlio.CountingIODiscard 265 w := NewEncoder(&small, 128*1024) 266 cw := tlog.NewConsoleWriter(io.MultiWriter(&full, w), tlog.LstdFlags) 267 268 l := tlog.New(cw) 269 tr := l.Start("span_name") 270 271 types := []string{"type_a", "value_b", "qweqew", "asdads"} 272 273 for i := 0; i < b.N; i++ { 274 // tr := l.Start("span_name") 275 tr.Printw("some example message", "i", i, "type", types[i%len(types)]) 276 // tr.Finish() 277 } 278 279 b.SetBytes(full.Bytes.Load() / int64(b.N)) 280 b.ReportMetric(float64(full.Bytes.Load())/float64(small.Bytes.Load()), "ratio") 281 } 282 283 const BlockSize, HTSize = 1024 * 1024, 16 * 1024 284 285 func BenchmarkEncodeFile(b *testing.B) { 286 err := loadTestFile(b, *fileFlag) 287 if err != nil { 288 b.Skipf("loading data: %v", err) 289 } 290 291 b.ReportAllocs() 292 b.ResetTimer() 293 294 var c tlio.CountingIODiscard 295 w := NewEncoderHTSize(&c, BlockSize, HTSize) 296 297 // b.Logf("block %x ht %x (%x * %x)", len(w.block), len(w.ht)*int(unsafe.Sizeof(w.ht[0])), len(w.ht), unsafe.Sizeof(w.ht[0])) 298 299 written := 0 300 for i := 0; i < b.N; i++ { 301 j := i % testsCount 302 msg := testData[testOff[j]:testOff[j+1]] 303 304 n, err := w.Write(msg) 305 if err != nil { 306 b.Fatalf("write: %v", err) 307 } 308 if n != len(msg) { 309 b.Fatalf("write %v of %v", n, len(msg)) 310 } 311 312 written += n 313 } 314 315 // b.Logf("total written: %x %x", w.pos, w.pos/len(w.block)) 316 317 b.ReportMetric(float64(written)/float64(c.Bytes.Load()), "ratio") 318 // b.ReportMetric(float64(c.Operations)/float64(b.N), "writes/op") 319 b.SetBytes(int64(written / b.N)) 320 } 321 322 func BenchmarkDecodeFile(b *testing.B) { 323 err := loadTestFile(b, *fileFlag) 324 if err != nil { 325 b.Skipf("loading data: %v", err) 326 } 327 328 encoded := make(low.Buf, 0, len(testData)/2) 329 w := NewEncoderHTSize(&encoded, BlockSize, HTSize) 330 331 const limit = 20000 332 333 written := 0 334 for i := 0; i < testsCount && i < limit; i++ { 335 j := i % testsCount 336 msg := testData[testOff[j]:testOff[j+1]] 337 338 n, err := w.Write(msg) 339 if err != nil { 340 b.Fatalf("write: %v", err) 341 } 342 if n != len(msg) { 343 b.Fatalf("write %v of %v", n, len(msg)) 344 } 345 346 written += n 347 } 348 349 b.ReportAllocs() 350 b.ResetTimer() 351 352 b.ReportMetric(float64(written)/float64(len(encoded)), "ratio") 353 354 // var decoded []byte 355 decoded := make(low.Buf, 0, len(testData)) 356 buf := make([]byte, 4096) 357 r := NewDecoderBytes(encoded) 358 359 for i := 0; i < b.N/testsCount; i++ { 360 r.ResetBytes(encoded) 361 decoded = decoded[:0] 362 363 _, err = io.CopyBuffer(&decoded, r, buf) 364 assert.NoError(b, err) 365 } 366 367 // b.Logf("decoded %x", len(decoded)) 368 369 b.SetBytes(int64(decoded.Len() / testsCount)) 370 371 min := len(testData) 372 if min > decoded.Len() { 373 min = decoded.Len() 374 } 375 assert.Equal(b, testData[:min], decoded.Bytes()) 376 } 377 378 func loadTestFile(tb testing.TB, f string) (err error) { 379 tb.Helper() 380 381 if testData != nil { 382 return 383 } 384 385 testData, err = ioutil.ReadFile(f) 386 if err != nil { 387 return errors.Wrap(err, "open data file") 388 } 389 390 var d tlwire.Decoder 391 testOff = make([]int, 0, len(testData)/100) 392 393 var st int 394 for st < len(testData) { 395 testOff = append(testOff, st) 396 st = d.Skip(testData, st) 397 } 398 testsCount = len(testOff) 399 testOff = append(testOff, st) 400 401 tb.Logf("events loaded: %v", testsCount) 402 403 return 404 } 405 406 func FuzzEncoder(f *testing.F) { 407 f.Add( 408 []byte("prefix_1234_suffix"), 409 []byte("prefix_567_suffix"), 410 []byte("suffix_prefix"), 411 ) 412 413 f.Add( 414 []byte("aaaaaa"), 415 []byte("aaaaaaaaaaaa"), 416 []byte("aaaaaaaaaaaaaaaaaaaaaaaa"), 417 ) 418 419 f.Add( 420 []byte("aaaaab"), 421 []byte("aaaaabaaaaaa"), 422 []byte("aaaaaaaaaaabaaaaaaaaaaaa"), 423 ) 424 425 var ebuf, dbuf bytes.Buffer 426 buf := make([]byte, 16) 427 428 e := NewEncoderHTSize(&ebuf, 512, 32) 429 d := NewDecoder(&dbuf) 430 431 f.Fuzz(func(t *testing.T, p0, p1, p2 []byte) { 432 e.Reset(e.Writer) 433 ebuf.Reset() 434 435 for _, p := range [][]byte{p0, p1, p2} { 436 n, err := e.Write(p) 437 assert.NoError(t, err) 438 assert.Equal(t, len(p), n) 439 } 440 441 d.ResetBytes(ebuf.Bytes()) 442 dbuf.Reset() 443 444 m, err := io.CopyBuffer(&dbuf, d, buf) 445 assert.NoError(t, err) 446 assert.Equal(t, len(p0)+len(p1)+len(p2), int(m)) 447 448 i := 0 449 for _, p := range [][]byte{p0, p1, p2} { 450 assert.Equal(t, p, dbuf.Bytes()[i:i+len(p)]) 451 i += len(p) 452 } 453 454 assert.Equal(t, int(m), i) 455 456 if !t.Failed() { 457 return 458 } 459 460 for i, p := range [][]byte{p0, p1, p2} { 461 t.Logf("p%d\n%s", i, hex.Dump(p)) 462 } 463 464 t.Logf("encoded dump\n%s", Dump(ebuf.Bytes())) 465 }) 466 }