github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/compress/compress_test.go (about) 1 package compress_test 2 3 import ( 4 "bytes" 5 _ "embed" 6 "fmt" 7 "io" 8 "testing" 9 10 "github.com/parquet-go/parquet-go/compress" 11 "github.com/parquet-go/parquet-go/compress/brotli" 12 "github.com/parquet-go/parquet-go/compress/gzip" 13 "github.com/parquet-go/parquet-go/compress/lz4" 14 "github.com/parquet-go/parquet-go/compress/snappy" 15 "github.com/parquet-go/parquet-go/compress/uncompressed" 16 "github.com/parquet-go/parquet-go/compress/zstd" 17 ) 18 19 var tests = [...]struct { 20 scenario string 21 codec compress.Codec 22 }{ 23 { 24 scenario: "uncompressed", 25 codec: new(uncompressed.Codec), 26 }, 27 28 { 29 scenario: "snappy", 30 codec: new(snappy.Codec), 31 }, 32 33 { 34 scenario: "gzip", 35 codec: new(gzip.Codec), 36 }, 37 38 { 39 scenario: "brotli", 40 codec: new(brotli.Codec), 41 }, 42 43 { 44 scenario: "zstd", 45 codec: new(zstd.Codec), 46 }, 47 48 { 49 scenario: "lz4-fastest", 50 codec: &lz4.Codec{Level: lz4.Fastest}, 51 }, 52 { 53 scenario: "lz4-fast", 54 codec: &lz4.Codec{Level: lz4.Fast}, 55 }, 56 { 57 scenario: "lz4-l1", 58 codec: &lz4.Codec{Level: lz4.Level1}, 59 }, 60 { 61 scenario: "lz4-l5", 62 codec: &lz4.Codec{Level: lz4.Level5}, 63 }, 64 { 65 scenario: "lz4-l9", 66 codec: &lz4.Codec{Level: lz4.Level9}, 67 }, 68 } 69 70 var ( 71 testdata = bytes.Repeat([]byte("1234567890qwertyuiopasdfghjklzxcvbnm"), 10e3) 72 //go:embed testdata/e.txt 73 testdataE []byte 74 //go:embed testdata/gettysburg.txt 75 testdataGettysburg []byte 76 //go:embed testdata/html.txt 77 testdataHTML []byte 78 //go:embed testdata/Mark.Twain-Tom.Sawyer.txt 79 testdataTomSawyer []byte 80 //go:embed testdata/pi.txt 81 testdataPi []byte 82 //go:embed testdata/pngdata.bin 83 testdataPNGData []byte 84 ) 85 86 func TestCompressionCodec(t *testing.T) { 87 buffer := make([]byte, 0, len(testdata)) 88 output := make([]byte, 0, len(testdata)) 89 90 for _, test := range tests { 91 t.Run(test.scenario, func(t *testing.T) { 92 const N = 10 93 // Run the test multiple times to exercise codecs that maintain 94 // state across compression/decompression. 95 for i := 0; i < N; i++ { 96 var err error 97 98 buffer, err = test.codec.Encode(buffer[:0], testdata) 99 if err != nil { 100 t.Fatal(err) 101 } 102 103 output, err = test.codec.Decode(output[:0], buffer) 104 if err != nil { 105 t.Fatal(err) 106 } 107 108 if !bytes.Equal(testdata, output) { 109 t.Errorf("content mismatch after compressing and decompressing (attempt %d/%d)", i+1, N) 110 } 111 } 112 }) 113 } 114 } 115 116 func BenchmarkEncode(b *testing.B) { 117 buffer := make([]byte, 0, len(testdata)) 118 119 for testdataName, testdataBytes := range map[string][]byte{ 120 "e": testdataE, 121 "gettysburg": testdataGettysburg, 122 "html": testdataHTML, 123 "tom-sawyer": testdataTomSawyer, 124 "pi": testdataPi, 125 "png": testdataPNGData, 126 } { 127 for _, test := range tests { 128 testName := fmt.Sprintf("%s-%s", test.scenario, testdataName) 129 130 buffer, _ = test.codec.Encode(buffer[:0], testdataBytes) 131 b.Logf("%s | Compression ratio: %.2f%%", testName, float64(len(buffer))/float64(len(testdataBytes))*100) 132 133 b.Run(testName, func(b *testing.B) { 134 b.SetBytes(int64(len(testdataBytes))) 135 benchmarkZeroAllocsPerRun(b, func() { 136 buffer, _ = test.codec.Encode(buffer[:0], testdataBytes) 137 }) 138 }) 139 } 140 } 141 } 142 143 func BenchmarkDecode(b *testing.B) { 144 buffer := make([]byte, 0, len(testdata)) 145 output := make([]byte, 0, len(testdata)) 146 147 for _, test := range tests { 148 b.Run(test.scenario, func(b *testing.B) { 149 buffer, _ = test.codec.Encode(buffer[:0], testdata) 150 b.SetBytes(int64(len(testdata))) 151 benchmarkZeroAllocsPerRun(b, func() { 152 output, _ = test.codec.Encode(output[:0], buffer) 153 }) 154 }) 155 } 156 } 157 158 type simpleReader struct{ io.Reader } 159 160 func (s *simpleReader) Close() error { return nil } 161 func (s *simpleReader) Reset(r io.Reader) error { s.Reader = r; return nil } 162 163 type simpleWriter struct{ io.Writer } 164 165 func (s *simpleWriter) Close() error { return nil } 166 func (s *simpleWriter) Reset(w io.Writer) { s.Writer = w } 167 168 func BenchmarkCompressor(b *testing.B) { 169 compressor := compress.Compressor{} 170 src := make([]byte, 1000) 171 dst := make([]byte, 1000) 172 173 benchmarkZeroAllocsPerRun(b, func() { 174 dst, _ = compressor.Encode(dst, src, func(w io.Writer) (compress.Writer, error) { 175 return &simpleWriter{Writer: w}, nil 176 }) 177 }) 178 } 179 180 func BenchmarkDecompressor(b *testing.B) { 181 decompressor := compress.Decompressor{} 182 src := make([]byte, 1000) 183 dst := make([]byte, 1000) 184 185 benchmarkZeroAllocsPerRun(b, func() { 186 dst, _ = decompressor.Decode(dst, src, func(r io.Reader) (compress.Reader, error) { 187 return &simpleReader{Reader: r}, nil 188 }) 189 }) 190 } 191 192 func benchmarkZeroAllocsPerRun(b *testing.B, f func()) { 193 if allocs := testing.AllocsPerRun(b.N, f); allocs != 0 && !testing.Short() { 194 b.Errorf("too many memory allocations: %g > 0", allocs) 195 } 196 }