github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/compress/compress_test.go (about)

     1  package compress_test
     2  
     3  import (
     4  	"bytes"
     5  	_ "embed"
     6  	"fmt"
     7  	"io"
     8  	"testing"
     9  
    10  	"github.com/parquet-go/parquet-go/compress"
    11  	"github.com/parquet-go/parquet-go/compress/brotli"
    12  	"github.com/parquet-go/parquet-go/compress/gzip"
    13  	"github.com/parquet-go/parquet-go/compress/lz4"
    14  	"github.com/parquet-go/parquet-go/compress/snappy"
    15  	"github.com/parquet-go/parquet-go/compress/uncompressed"
    16  	"github.com/parquet-go/parquet-go/compress/zstd"
    17  )
    18  
    19  var tests = [...]struct {
    20  	scenario string
    21  	codec    compress.Codec
    22  }{
    23  	{
    24  		scenario: "uncompressed",
    25  		codec:    new(uncompressed.Codec),
    26  	},
    27  
    28  	{
    29  		scenario: "snappy",
    30  		codec:    new(snappy.Codec),
    31  	},
    32  
    33  	{
    34  		scenario: "gzip",
    35  		codec:    new(gzip.Codec),
    36  	},
    37  
    38  	{
    39  		scenario: "brotli",
    40  		codec:    new(brotli.Codec),
    41  	},
    42  
    43  	{
    44  		scenario: "zstd",
    45  		codec:    new(zstd.Codec),
    46  	},
    47  
    48  	{
    49  		scenario: "lz4-fastest",
    50  		codec:    &lz4.Codec{Level: lz4.Fastest},
    51  	},
    52  	{
    53  		scenario: "lz4-fast",
    54  		codec:    &lz4.Codec{Level: lz4.Fast},
    55  	},
    56  	{
    57  		scenario: "lz4-l1",
    58  		codec:    &lz4.Codec{Level: lz4.Level1},
    59  	},
    60  	{
    61  		scenario: "lz4-l5",
    62  		codec:    &lz4.Codec{Level: lz4.Level5},
    63  	},
    64  	{
    65  		scenario: "lz4-l9",
    66  		codec:    &lz4.Codec{Level: lz4.Level9},
    67  	},
    68  }
    69  
    70  var (
    71  	testdata = bytes.Repeat([]byte("1234567890qwertyuiopasdfghjklzxcvbnm"), 10e3)
    72  	//go:embed testdata/e.txt
    73  	testdataE []byte
    74  	//go:embed testdata/gettysburg.txt
    75  	testdataGettysburg []byte
    76  	//go:embed testdata/html.txt
    77  	testdataHTML []byte
    78  	//go:embed testdata/Mark.Twain-Tom.Sawyer.txt
    79  	testdataTomSawyer []byte
    80  	//go:embed testdata/pi.txt
    81  	testdataPi []byte
    82  	//go:embed testdata/pngdata.bin
    83  	testdataPNGData []byte
    84  )
    85  
    86  func TestCompressionCodec(t *testing.T) {
    87  	buffer := make([]byte, 0, len(testdata))
    88  	output := make([]byte, 0, len(testdata))
    89  
    90  	for _, test := range tests {
    91  		t.Run(test.scenario, func(t *testing.T) {
    92  			const N = 10
    93  			// Run the test multiple times to exercise codecs that maintain
    94  			// state across compression/decompression.
    95  			for i := 0; i < N; i++ {
    96  				var err error
    97  
    98  				buffer, err = test.codec.Encode(buffer[:0], testdata)
    99  				if err != nil {
   100  					t.Fatal(err)
   101  				}
   102  
   103  				output, err = test.codec.Decode(output[:0], buffer)
   104  				if err != nil {
   105  					t.Fatal(err)
   106  				}
   107  
   108  				if !bytes.Equal(testdata, output) {
   109  					t.Errorf("content mismatch after compressing and decompressing (attempt %d/%d)", i+1, N)
   110  				}
   111  			}
   112  		})
   113  	}
   114  }
   115  
   116  func BenchmarkEncode(b *testing.B) {
   117  	buffer := make([]byte, 0, len(testdata))
   118  
   119  	for testdataName, testdataBytes := range map[string][]byte{
   120  		"e":          testdataE,
   121  		"gettysburg": testdataGettysburg,
   122  		"html":       testdataHTML,
   123  		"tom-sawyer": testdataTomSawyer,
   124  		"pi":         testdataPi,
   125  		"png":        testdataPNGData,
   126  	} {
   127  		for _, test := range tests {
   128  			testName := fmt.Sprintf("%s-%s", test.scenario, testdataName)
   129  
   130  			buffer, _ = test.codec.Encode(buffer[:0], testdataBytes)
   131  			b.Logf("%s | Compression ratio: %.2f%%", testName, float64(len(buffer))/float64(len(testdataBytes))*100)
   132  
   133  			b.Run(testName, func(b *testing.B) {
   134  				b.SetBytes(int64(len(testdataBytes)))
   135  				benchmarkZeroAllocsPerRun(b, func() {
   136  					buffer, _ = test.codec.Encode(buffer[:0], testdataBytes)
   137  				})
   138  			})
   139  		}
   140  	}
   141  }
   142  
   143  func BenchmarkDecode(b *testing.B) {
   144  	buffer := make([]byte, 0, len(testdata))
   145  	output := make([]byte, 0, len(testdata))
   146  
   147  	for _, test := range tests {
   148  		b.Run(test.scenario, func(b *testing.B) {
   149  			buffer, _ = test.codec.Encode(buffer[:0], testdata)
   150  			b.SetBytes(int64(len(testdata)))
   151  			benchmarkZeroAllocsPerRun(b, func() {
   152  				output, _ = test.codec.Encode(output[:0], buffer)
   153  			})
   154  		})
   155  	}
   156  }
   157  
   158  type simpleReader struct{ io.Reader }
   159  
   160  func (s *simpleReader) Close() error            { return nil }
   161  func (s *simpleReader) Reset(r io.Reader) error { s.Reader = r; return nil }
   162  
   163  type simpleWriter struct{ io.Writer }
   164  
   165  func (s *simpleWriter) Close() error      { return nil }
   166  func (s *simpleWriter) Reset(w io.Writer) { s.Writer = w }
   167  
   168  func BenchmarkCompressor(b *testing.B) {
   169  	compressor := compress.Compressor{}
   170  	src := make([]byte, 1000)
   171  	dst := make([]byte, 1000)
   172  
   173  	benchmarkZeroAllocsPerRun(b, func() {
   174  		dst, _ = compressor.Encode(dst, src, func(w io.Writer) (compress.Writer, error) {
   175  			return &simpleWriter{Writer: w}, nil
   176  		})
   177  	})
   178  }
   179  
   180  func BenchmarkDecompressor(b *testing.B) {
   181  	decompressor := compress.Decompressor{}
   182  	src := make([]byte, 1000)
   183  	dst := make([]byte, 1000)
   184  
   185  	benchmarkZeroAllocsPerRun(b, func() {
   186  		dst, _ = decompressor.Decode(dst, src, func(r io.Reader) (compress.Reader, error) {
   187  			return &simpleReader{Reader: r}, nil
   188  		})
   189  	})
   190  }
   191  
   192  func benchmarkZeroAllocsPerRun(b *testing.B, f func()) {
   193  	if allocs := testing.AllocsPerRun(b.N, f); allocs != 0 && !testing.Short() {
   194  		b.Errorf("too many memory allocations: %g > 0", allocs)
   195  	}
   196  }