github.com/nikandfor/tlog@v0.21.5-0.20231108111739-3ef89426a96d/tlz/compress_test.go (about)

     1  package tlz
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/hex"
     6  	"flag"
     7  	"io"
     8  	"io/ioutil"
     9  	"testing"
    10  
    11  	//"github.com/nikandfor/assert"
    12  	"github.com/nikandfor/errors"
    13  	"github.com/stretchr/testify/assert"
    14  
    15  	"github.com/nikandfor/tlog"
    16  	"github.com/nikandfor/tlog/low"
    17  	"github.com/nikandfor/tlog/tlio"
    18  	"github.com/nikandfor/tlog/tlwire"
    19  )
    20  
    21  var fileFlag = flag.String("test-file", "../log.tlog", "file with tlog logs")
    22  
    23  var (
    24  	testData   []byte
    25  	testOff    []int
    26  	testsCount int
    27  )
    28  
    29  func TestFileMagic(t *testing.T) {
    30  	var buf low.Buf
    31  
    32  	w := NewEncoder(&buf, MiB)
    33  
    34  	_, err := w.Write([]byte{})
    35  	assert.NoError(t, err)
    36  
    37  	if assert.True(t, len(buf) >= len(FileMagic)) {
    38  		assert.Equal(t, FileMagic, string(buf[:len(FileMagic)]))
    39  	}
    40  }
    41  
    42  func TestLiteral(t *testing.T) {
    43  	const B = 32
    44  
    45  	var buf low.Buf
    46  
    47  	w := newEncoder(&buf, B, 1)
    48  
    49  	n, err := w.Write([]byte("very_first_message"))
    50  	assert.Equal(t, 18, n)
    51  	assert.NoError(t, err)
    52  
    53  	t.Logf("buf pos %x ht %x\n%v", w.pos, w.ht, hex.Dump(w.block))
    54  	t.Logf("res\n%v", hex.Dump(buf))
    55  	t.Logf("res\n%v", Dump(buf))
    56  
    57  	r := &Decoder{
    58  		b: buf,
    59  	}
    60  
    61  	p := make([]byte, 100)
    62  
    63  	t.Logf("*** read back ***")
    64  
    65  	n, err = r.Read(p[:10])
    66  	assert.Equal(t, 10, n)
    67  	assert.NoError(t, err)
    68  	assert.Equal(t, []byte("very_first"), p[:n])
    69  
    70  	copy(p[:10], zeros)
    71  
    72  	n, err = r.Read(p[:10])
    73  	assert.Equal(t, 8, n)
    74  	assert.Equal(t, io.EOF, err)
    75  	assert.Equal(t, []byte("_message"), p[:n])
    76  }
    77  
    78  func TestCopy(t *testing.T) {
    79  	const B = 32
    80  
    81  	var buf low.Buf
    82  
    83  	w := newEncoder(&buf, B, 1)
    84  
    85  	st := 0
    86  
    87  	n, err := w.Write([]byte("prefix_1234_suffix"))
    88  	assert.Equal(t, 18, n)
    89  	assert.NoError(t, err)
    90  
    91  	t.Logf("buf pos %x ht %x\n%v", w.pos, w.ht, hex.Dump(w.block))
    92  	t.Logf("res\n%v", hex.Dump(buf[st:]))
    93  
    94  	st = len(buf)
    95  
    96  	n, err = w.Write([]byte("prefix_567_suffix"))
    97  	assert.Equal(t, 17, n)
    98  	assert.NoError(t, err)
    99  
   100  	t.Logf("buf  pos %x ht %x\n%v", w.pos, w.ht, hex.Dump(w.block))
   101  	t.Logf("res\n%v", hex.Dump(buf[st:]))
   102  
   103  	r := &Decoder{
   104  		b: buf,
   105  	}
   106  
   107  	p := make([]byte, 100)
   108  
   109  	t.Logf("*** read back ***")
   110  
   111  	n, err = r.Read(p[:10])
   112  	assert.Equal(t, 10, n)
   113  	assert.NoError(t, err)
   114  	assert.Equal(t, []byte("prefix_123"), p[:n])
   115  
   116  	t.Logf("buf  pos %x\n%v", r.pos, hex.Dump(r.block))
   117  
   118  	n, err = r.Read(p[:10])
   119  	assert.Equal(t, 10, n)
   120  	assert.NoError(t, err)
   121  	assert.Equal(t, []byte("4_suffixpr"), p[:n])
   122  
   123  	t.Logf("buf  pos %x\n%v", r.pos, hex.Dump(r.block))
   124  
   125  	n, err = r.Read(p[:30])
   126  	assert.Equal(t, 15, n)
   127  	assert.Equal(t, io.EOF, err)
   128  	assert.Equal(t, []byte("efix_567_suffix"), p[:n])
   129  
   130  	t.Logf("buf  pos %x\n%v", r.pos, hex.Dump(r.block))
   131  
   132  	//	t.Logf("compression ratio: %.3f", float64(18+17)/float64(len(buf)))
   133  }
   134  
   135  func TestDumpOnelineText(t *testing.T) {
   136  	t.Skip()
   137  
   138  	var dump, text low.Buf
   139  
   140  	d := NewDumper(&dump)
   141  	e := newEncoder(d, 1*1024, 2)
   142  
   143  	cw := tlog.NewConsoleWriter(tlio.NewMultiWriter(e, &text), tlog.LstdFlags)
   144  
   145  	l := tlog.New(cw)
   146  	tr := l.Start("span_name")
   147  
   148  	types := []string{"type_a", "value_b", "qweqew", "asdads"}
   149  
   150  	for i := 0; i < 20; i++ {
   151  		//	tr := l.Start("span_name")
   152  		tr.Printw("some example message", "i", i, "type", types[i%len(types)])
   153  		//	tr.Finish()
   154  	}
   155  
   156  	t.Logf("text:\n%s", text)
   157  	t.Logf("dump:\n%s", dump)
   158  }
   159  
   160  func TestBug1(t *testing.T) {
   161  	//	tl = tlog.NewTestLogger(t, "", nil)
   162  	//	tlog.DefaultLogger = tl
   163  
   164  	var b bytes.Buffer
   165  
   166  	p := make([]byte, 1000)
   167  	d := NewDecoder(&b)
   168  
   169  	//	tl.Printw("first")
   170  
   171  	_, _ = b.Write([]byte{Literal | Meta, MetaReset | 0, 4})
   172  	_, _ = b.Write([]byte{Literal | 3, 0x94, 0xa8, 0xfb, Copy | 9})
   173  
   174  	n, err := d.Read(p)
   175  	assert.ErrorIs(t, err, io.ErrUnexpectedEOF)
   176  	assert.Equal(t, 3, n)
   177  
   178  	//	tl.Printw("second")
   179  
   180  	_, _ = b.Write([]byte{0xfd, 0x03, 0x65}) // offset
   181  
   182  	n, err = d.Read(p)
   183  	assert.ErrorIs(t, err, io.EOF)
   184  	assert.Equal(t, 9, n)
   185  }
   186  
   187  func TestOnFile(t *testing.T) {
   188  	err := loadTestFile(t, *fileFlag)
   189  	if err != nil {
   190  		t.Skipf("loading data: %v", err)
   191  	}
   192  
   193  	var encoded bytes.Buffer
   194  	var full bytes.Buffer
   195  	w := NewEncoderHTSize(tlio.NewMultiWriter(&encoded, &full), 512, 256)
   196  	r := NewDecoder(&encoded)
   197  	var buf []byte
   198  
   199  	//	dumper := tlwire.NewDumper(os.Stderr)
   200  
   201  	for i := 0; i < testsCount; i++ {
   202  		msg := testData[testOff[i]:testOff[i+1]]
   203  
   204  		//	_, _ = dumper.Write(msg)
   205  
   206  		n, err := w.Write(msg)
   207  		assert.NoError(t, err)
   208  		assert.Equal(t, len(msg), n)
   209  
   210  		for n > len(buf) {
   211  			buf = append(buf[:cap(buf)], 0, 0, 0, 0, 0, 0, 0, 0)
   212  		}
   213  
   214  		n, err = r.Read(buf[:n])
   215  		assert.NoError(t, err)
   216  		assert.Equal(t, len(msg), n)
   217  
   218  		assert.Equal(t, msg, []byte(buf[:n]))
   219  
   220  		if t.Failed() {
   221  			break
   222  		}
   223  	}
   224  
   225  	r.Reset(&full)
   226  	buf = buf[:0]
   227  
   228  	var dec bytes.Buffer
   229  
   230  	n, err := io.Copy(&dec, r)
   231  	assert.NoError(t, err)
   232  	assert.Equal(t, int(n), dec.Len())
   233  
   234  	min := dec.Len()
   235  	assert.Equal(t, testData[:min], dec.Bytes())
   236  
   237  	//	t.Logf("metrics: %v  bytes %v  events %v", mm, dec.Len(), testsCount)
   238  }
   239  
   240  func BenchmarkLogCompressOneline(b *testing.B) {
   241  	b.ReportAllocs()
   242  
   243  	var full, small tlio.CountingIODiscard
   244  	w := NewEncoder(&small, 128*1024)
   245  
   246  	l := tlog.New(io.MultiWriter(&full, w))
   247  	tr := l.Start("span_name")
   248  
   249  	types := []string{"type_a", "value_b", "qweqew", "asdads"}
   250  
   251  	for i := 0; i < b.N; i++ {
   252  		//	tr := l.Start("span_name")
   253  		tr.Printw("some example message", "i", i, "type", types[i%len(types)])
   254  		//	tr.Finish()
   255  	}
   256  
   257  	b.SetBytes(full.Bytes.Load() / int64(b.N))
   258  	b.ReportMetric(float64(full.Bytes.Load())/float64(small.Bytes.Load()), "ratio")
   259  }
   260  
   261  func BenchmarkLogCompressOnelineText(b *testing.B) {
   262  	b.ReportAllocs()
   263  
   264  	var full, small tlio.CountingIODiscard
   265  	w := NewEncoder(&small, 128*1024)
   266  	cw := tlog.NewConsoleWriter(io.MultiWriter(&full, w), tlog.LstdFlags)
   267  
   268  	l := tlog.New(cw)
   269  	tr := l.Start("span_name")
   270  
   271  	types := []string{"type_a", "value_b", "qweqew", "asdads"}
   272  
   273  	for i := 0; i < b.N; i++ {
   274  		//	tr := l.Start("span_name")
   275  		tr.Printw("some example message", "i", i, "type", types[i%len(types)])
   276  		//	tr.Finish()
   277  	}
   278  
   279  	b.SetBytes(full.Bytes.Load() / int64(b.N))
   280  	b.ReportMetric(float64(full.Bytes.Load())/float64(small.Bytes.Load()), "ratio")
   281  }
   282  
   283  const BlockSize, HTSize = 1024 * 1024, 16 * 1024
   284  
   285  func BenchmarkEncodeFile(b *testing.B) {
   286  	err := loadTestFile(b, *fileFlag)
   287  	if err != nil {
   288  		b.Skipf("loading data: %v", err)
   289  	}
   290  
   291  	b.ReportAllocs()
   292  	b.ResetTimer()
   293  
   294  	var c tlio.CountingIODiscard
   295  	w := NewEncoderHTSize(&c, BlockSize, HTSize)
   296  
   297  	//	b.Logf("block %x  ht %x (%x * %x)", len(w.block), len(w.ht)*int(unsafe.Sizeof(w.ht[0])), len(w.ht), unsafe.Sizeof(w.ht[0]))
   298  
   299  	written := 0
   300  	for i := 0; i < b.N; i++ {
   301  		j := i % testsCount
   302  		msg := testData[testOff[j]:testOff[j+1]]
   303  
   304  		n, err := w.Write(msg)
   305  		if err != nil {
   306  			b.Fatalf("write: %v", err)
   307  		}
   308  		if n != len(msg) {
   309  			b.Fatalf("write %v of %v", n, len(msg))
   310  		}
   311  
   312  		written += n
   313  	}
   314  
   315  	//	b.Logf("total written: %x  %x", w.pos, w.pos/len(w.block))
   316  
   317  	b.ReportMetric(float64(written)/float64(c.Bytes.Load()), "ratio")
   318  	//	b.ReportMetric(float64(c.Operations)/float64(b.N), "writes/op")
   319  	b.SetBytes(int64(written / b.N))
   320  }
   321  
   322  func BenchmarkDecodeFile(b *testing.B) {
   323  	err := loadTestFile(b, *fileFlag)
   324  	if err != nil {
   325  		b.Skipf("loading data: %v", err)
   326  	}
   327  
   328  	encoded := make(low.Buf, 0, len(testData)/2)
   329  	w := NewEncoderHTSize(&encoded, BlockSize, HTSize)
   330  
   331  	const limit = 20000
   332  
   333  	written := 0
   334  	for i := 0; i < testsCount && i < limit; i++ {
   335  		j := i % testsCount
   336  		msg := testData[testOff[j]:testOff[j+1]]
   337  
   338  		n, err := w.Write(msg)
   339  		if err != nil {
   340  			b.Fatalf("write: %v", err)
   341  		}
   342  		if n != len(msg) {
   343  			b.Fatalf("write %v of %v", n, len(msg))
   344  		}
   345  
   346  		written += n
   347  	}
   348  
   349  	b.ReportAllocs()
   350  	b.ResetTimer()
   351  
   352  	b.ReportMetric(float64(written)/float64(len(encoded)), "ratio")
   353  
   354  	//	var decoded []byte
   355  	decoded := make(low.Buf, 0, len(testData))
   356  	buf := make([]byte, 4096)
   357  	r := NewDecoderBytes(encoded)
   358  
   359  	for i := 0; i < b.N/testsCount; i++ {
   360  		r.ResetBytes(encoded)
   361  		decoded = decoded[:0]
   362  
   363  		_, err = io.CopyBuffer(&decoded, r, buf)
   364  		assert.NoError(b, err)
   365  	}
   366  
   367  	//	b.Logf("decoded %x", len(decoded))
   368  
   369  	b.SetBytes(int64(decoded.Len() / testsCount))
   370  
   371  	min := len(testData)
   372  	if min > decoded.Len() {
   373  		min = decoded.Len()
   374  	}
   375  	assert.Equal(b, testData[:min], decoded.Bytes())
   376  }
   377  
   378  func loadTestFile(tb testing.TB, f string) (err error) {
   379  	tb.Helper()
   380  
   381  	if testData != nil {
   382  		return
   383  	}
   384  
   385  	testData, err = ioutil.ReadFile(f)
   386  	if err != nil {
   387  		return errors.Wrap(err, "open data file")
   388  	}
   389  
   390  	var d tlwire.Decoder
   391  	testOff = make([]int, 0, len(testData)/100)
   392  
   393  	var st int
   394  	for st < len(testData) {
   395  		testOff = append(testOff, st)
   396  		st = d.Skip(testData, st)
   397  	}
   398  	testsCount = len(testOff)
   399  	testOff = append(testOff, st)
   400  
   401  	tb.Logf("events loaded: %v", testsCount)
   402  
   403  	return
   404  }
   405  
   406  func FuzzEncoder(f *testing.F) {
   407  	f.Add(
   408  		[]byte("prefix_1234_suffix"),
   409  		[]byte("prefix_567_suffix"),
   410  		[]byte("suffix_prefix"),
   411  	)
   412  
   413  	f.Add(
   414  		[]byte("aaaaaa"),
   415  		[]byte("aaaaaaaaaaaa"),
   416  		[]byte("aaaaaaaaaaaaaaaaaaaaaaaa"),
   417  	)
   418  
   419  	f.Add(
   420  		[]byte("aaaaab"),
   421  		[]byte("aaaaabaaaaaa"),
   422  		[]byte("aaaaaaaaaaabaaaaaaaaaaaa"),
   423  	)
   424  
   425  	var ebuf, dbuf bytes.Buffer
   426  	buf := make([]byte, 16)
   427  
   428  	e := NewEncoderHTSize(&ebuf, 512, 32)
   429  	d := NewDecoder(&dbuf)
   430  
   431  	f.Fuzz(func(t *testing.T, p0, p1, p2 []byte) {
   432  		e.Reset(e.Writer)
   433  		ebuf.Reset()
   434  
   435  		for _, p := range [][]byte{p0, p1, p2} {
   436  			n, err := e.Write(p)
   437  			assert.NoError(t, err)
   438  			assert.Equal(t, len(p), n)
   439  		}
   440  
   441  		d.ResetBytes(ebuf.Bytes())
   442  		dbuf.Reset()
   443  
   444  		m, err := io.CopyBuffer(&dbuf, d, buf)
   445  		assert.NoError(t, err)
   446  		assert.Equal(t, len(p0)+len(p1)+len(p2), int(m))
   447  
   448  		i := 0
   449  		for _, p := range [][]byte{p0, p1, p2} {
   450  			assert.Equal(t, p, dbuf.Bytes()[i:i+len(p)])
   451  			i += len(p)
   452  		}
   453  
   454  		assert.Equal(t, int(m), i)
   455  
   456  		if !t.Failed() {
   457  			return
   458  		}
   459  
   460  		for i, p := range [][]byte{p0, p1, p2} {
   461  			t.Logf("p%d\n%s", i, hex.Dump(p))
   462  		}
   463  
   464  		t.Logf("encoded dump\n%s", Dump(ebuf.Bytes()))
   465  	})
   466  }