gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/compressio/compressio_test.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package compressio
    16  
    17  import (
    18  	"bytes"
    19  	"compress/flate"
    20  	"encoding/base64"
    21  	"fmt"
    22  	"io"
    23  	"math/rand"
    24  	"runtime"
    25  	"testing"
    26  	"time"
    27  )
    28  
    29  type harness interface {
    30  	Errorf(format string, v ...any)
    31  	Fatalf(format string, v ...any)
    32  	Logf(format string, v ...any)
    33  }
    34  
    35  func initTest(t harness, size int) []byte {
    36  	// Set number of processes to number of CPUs.
    37  	runtime.GOMAXPROCS(runtime.NumCPU())
    38  
    39  	// Construct synthetic data. We do this by encoding random data with
    40  	// base64. This gives a high level of entropy, but still quite a bit of
    41  	// structure, to give reasonable compression ratios (~75%).
    42  	var buf bytes.Buffer
    43  	bufW := base64.NewEncoder(base64.RawStdEncoding, &buf)
    44  	bufR := rand.New(rand.NewSource(0))
    45  	if _, err := io.CopyN(bufW, bufR, int64(size)); err != nil {
    46  		t.Fatalf("unable to seed random data: %v", err)
    47  	}
    48  	return buf.Bytes()
    49  }
    50  
    51  type testOpts struct {
    52  	Name            string
    53  	Data            []byte
    54  	NewWriter       func(*bytes.Buffer) (io.Writer, error)
    55  	NewReader       func(*bytes.Buffer) (io.Reader, error)
    56  	PreCompress     func()
    57  	PostCompress    func()
    58  	PreDecompress   func()
    59  	PostDecompress  func()
    60  	CompressIters   int
    61  	DecompressIters int
    62  	CorruptData     bool
    63  }
    64  
    65  func doTest(t harness, opts testOpts) {
    66  	// Compress.
    67  	var compressed bytes.Buffer
    68  	compressionStartTime := time.Now()
    69  	if opts.PreCompress != nil {
    70  		opts.PreCompress()
    71  	}
    72  	if opts.CompressIters <= 0 {
    73  		opts.CompressIters = 1
    74  	}
    75  	for i := 0; i < opts.CompressIters; i++ {
    76  		compressed.Reset()
    77  		w, err := opts.NewWriter(&compressed)
    78  		if err != nil {
    79  			t.Errorf("%s: NewWriter got err %v, expected nil", opts.Name, err)
    80  		}
    81  		if _, err := io.Copy(w, bytes.NewBuffer(opts.Data)); err != nil {
    82  			t.Errorf("%s: compress got err %v, expected nil", opts.Name, err)
    83  			return
    84  		}
    85  		closer, ok := w.(io.Closer)
    86  		if ok {
    87  			if err := closer.Close(); err != nil {
    88  				t.Errorf("%s: got err %v, expected nil", opts.Name, err)
    89  				return
    90  			}
    91  		}
    92  	}
    93  	if opts.PostCompress != nil {
    94  		opts.PostCompress()
    95  	}
    96  	compressionTime := time.Since(compressionStartTime)
    97  	compressionRatio := float32(compressed.Len()) / float32(len(opts.Data))
    98  
    99  	if compressed.Len() == 0 {
   100  		// Data can't be corrupted if there is no data.
   101  		opts.CorruptData = false
   102  	}
   103  
   104  	// Decompress.
   105  	var decompressed bytes.Buffer
   106  	decompressionStartTime := time.Now()
   107  	if opts.PreDecompress != nil {
   108  		opts.PreDecompress()
   109  	}
   110  	if opts.DecompressIters <= 0 {
   111  		opts.DecompressIters = 1
   112  	}
   113  	if opts.CorruptData {
   114  		b := compressed.Bytes()
   115  		b[rand.Intn(len(b))]++
   116  	}
   117  	for i := 0; i < opts.DecompressIters; i++ {
   118  		decompressed.Reset()
   119  		r, err := opts.NewReader(bytes.NewBuffer(compressed.Bytes()))
   120  		if err != nil {
   121  			if opts.CorruptData {
   122  				continue
   123  			}
   124  			t.Errorf("%s: NewReader got err %v, expected nil", opts.Name, err)
   125  			return
   126  		}
   127  		if _, err := io.Copy(&decompressed, r); (err != nil) != opts.CorruptData {
   128  			t.Errorf("%s: decompress got err %v unexpectedly", opts.Name, err)
   129  			return
   130  		}
   131  	}
   132  	if opts.PostDecompress != nil {
   133  		opts.PostDecompress()
   134  	}
   135  	decompressionTime := time.Since(decompressionStartTime)
   136  
   137  	if opts.CorruptData {
   138  		return
   139  	}
   140  
   141  	// Verify.
   142  	if decompressed.Len() != len(opts.Data) {
   143  		t.Errorf("%s: got %d bytes, expected %d", opts.Name, decompressed.Len(), len(opts.Data))
   144  	}
   145  	if !bytes.Equal(opts.Data, decompressed.Bytes()) {
   146  		t.Errorf("%s: got mismatch, expected match", opts.Name)
   147  		if len(opts.Data) < 32 { // Don't flood the logs.
   148  			t.Errorf("got %v, expected %v", decompressed.Bytes(), opts.Data)
   149  		}
   150  	}
   151  
   152  	t.Logf("%s: compression time %v, ratio %2.2f, decompression time %v",
   153  		opts.Name, compressionTime, compressionRatio, decompressionTime)
   154  }
   155  
   156  var hashKey = []byte("01234567890123456789012345678901")
   157  
   158  func TestCompress(t *testing.T) {
   159  	rand.Seed(time.Now().Unix())
   160  
   161  	var (
   162  		data  = initTest(t, 10*1024*1024)
   163  		data0 = data[:0]
   164  		data1 = data[:1]
   165  		data2 = data[:11]
   166  		data3 = data[:16]
   167  		data4 = data[:]
   168  	)
   169  
   170  	for _, data := range [][]byte{data0, data1, data2, data3, data4} {
   171  		for _, blockSize := range []uint32{1, 4, 1024, 4 * 1024, 16 * 1024} {
   172  			// Skip annoying tests; they just take too long.
   173  			if blockSize <= 16 && len(data) > 16 {
   174  				continue
   175  			}
   176  
   177  			for _, key := range [][]byte{nil, hashKey} {
   178  				for _, corruptData := range []bool{false, true} {
   179  					if key == nil && corruptData {
   180  						// No need to test corrupt data
   181  						// case when not doing hashing.
   182  						continue
   183  					}
   184  					// Do the compress test.
   185  					doTest(t, testOpts{
   186  						Name: fmt.Sprintf("len(data)=%d, blockSize=%d, key=%s, corruptData=%v", len(data), blockSize, string(key), corruptData),
   187  						Data: data,
   188  						NewWriter: func(b *bytes.Buffer) (io.Writer, error) {
   189  							return NewWriter(b, key, blockSize, flate.BestSpeed)
   190  						},
   191  						NewReader: func(b *bytes.Buffer) (io.Reader, error) {
   192  							return NewReader(b, key)
   193  						},
   194  						CorruptData: corruptData,
   195  					})
   196  				}
   197  			}
   198  		}
   199  
   200  		// Do the vanilla test.
   201  		doTest(t, testOpts{
   202  			Name: fmt.Sprintf("len(data)=%d, vanilla flate", len(data)),
   203  			Data: data,
   204  			NewWriter: func(b *bytes.Buffer) (io.Writer, error) {
   205  				return flate.NewWriter(b, flate.BestSpeed)
   206  			},
   207  			NewReader: func(b *bytes.Buffer) (io.Reader, error) {
   208  				return flate.NewReader(b), nil
   209  			},
   210  		})
   211  	}
   212  }
   213  
   214  const (
   215  	benchDataSize = 600 * 1024 * 1024
   216  )
   217  
   218  func benchmark(b *testing.B, compress bool, hash bool, blockSize uint32) {
   219  	b.StopTimer()
   220  	b.SetBytes(benchDataSize)
   221  	data := initTest(b, benchDataSize)
   222  	key := hashKey
   223  	if !hash {
   224  		key = nil
   225  	}
   226  	opts := testOpts{
   227  		Name: fmt.Sprintf("compress=%t, hash=%t, len(data)=%d, blockSize=%d", compress, hash, len(data), blockSize),
   228  		Data: data,
   229  		NewWriter: func(b *bytes.Buffer) (io.Writer, error) {
   230  			return NewWriter(b, key, blockSize, flate.BestSpeed)
   231  		},
   232  		NewReader: func(b *bytes.Buffer) (io.Reader, error) {
   233  			return NewReader(b, key)
   234  		},
   235  	}
   236  	if compress {
   237  		opts.PreCompress = b.StartTimer
   238  		opts.PostCompress = b.StopTimer
   239  		opts.CompressIters = b.N
   240  	} else {
   241  		opts.PreDecompress = b.StartTimer
   242  		opts.PostDecompress = b.StopTimer
   243  		opts.DecompressIters = b.N
   244  	}
   245  	doTest(b, opts)
   246  }
   247  
   248  func BenchmarkCompressNoHash64K(b *testing.B) {
   249  	benchmark(b, true, false, 64*1024)
   250  }
   251  
   252  func BenchmarkCompressHash64K(b *testing.B) {
   253  	benchmark(b, true, true, 64*1024)
   254  }
   255  
   256  func BenchmarkDecompressNoHash64K(b *testing.B) {
   257  	benchmark(b, false, false, 64*1024)
   258  }
   259  
   260  func BenchmarkDecompressHash64K(b *testing.B) {
   261  	benchmark(b, false, true, 64*1024)
   262  }
   263  
   264  func BenchmarkCompressNoHash1M(b *testing.B) {
   265  	benchmark(b, true, false, 1024*1024)
   266  }
   267  
   268  func BenchmarkCompressHash1M(b *testing.B) {
   269  	benchmark(b, true, true, 1024*1024)
   270  }
   271  
   272  func BenchmarkDecompressNoHash1M(b *testing.B) {
   273  	benchmark(b, false, false, 1024*1024)
   274  }
   275  
   276  func BenchmarkDecompressHash1M(b *testing.B) {
   277  	benchmark(b, false, true, 1024*1024)
   278  }
   279  
   280  func BenchmarkCompressNoHash16M(b *testing.B) {
   281  	benchmark(b, true, false, 16*1024*1024)
   282  }
   283  
   284  func BenchmarkCompressHash16M(b *testing.B) {
   285  	benchmark(b, true, true, 16*1024*1024)
   286  }
   287  
   288  func BenchmarkDecompressNoHash16M(b *testing.B) {
   289  	benchmark(b, false, false, 16*1024*1024)
   290  }
   291  
   292  func BenchmarkDecompressHash16M(b *testing.B) {
   293  	benchmark(b, false, true, 16*1024*1024)
   294  }