vitess.io/vitess@v0.16.2/go/vt/mysqlctl/compression_benchmark_test.go (about)

     1  package mysqlctl
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"crypto/md5"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"net/http"
    11  	"net/url"
    12  	"os"
    13  	"path"
    14  	"strconv"
    15  	"strings"
    16  	"testing"
    17  	"time"
    18  
    19  	"github.com/klauspost/compress/zstd"
    20  	"github.com/stretchr/testify/require"
    21  
    22  	"vitess.io/vitess/go/vt/logutil"
    23  )
    24  
    25  type (
    26  	benchmarkCompressArgs struct {
    27  		b        *testing.B
    28  		builtin  string
    29  		external string
    30  	}
    31  
    32  	benchmarkCompressEnv struct {
    33  		benchmarkCompressArgs
    34  	}
    35  
    36  	fnReadCloser struct {
    37  		io.Reader
    38  		closer func() error
    39  	}
    40  
    41  	meteredReader struct {
    42  		count int64
    43  		r     io.Reader
    44  	}
    45  
    46  	meteredWriter struct {
    47  		count int64
    48  		w     io.Writer
    49  	}
    50  
    51  	timedWriter struct {
    52  		duration time.Duration
    53  		w        io.Writer
    54  	}
    55  )
    56  
    57  const (
    58  	// This is the default file which will be downloaded, decompressed, and
    59  	// used by the compression benchmarks in this suite. It's a ~1.5 GiB
    60  	// compressed tar file containing 3 InnoDB files. The InnoDB files were
    61  	// built from this Wikipedia dataset:
    62  	//
    63  	//     https://dumps.wikimedia.org/archive/enwiki/20080103/enwiki-20080103-pages-articles.xml.bz2
    64  	defaultDataURL = "https://github.com/vitessio/vitess-resources/releases/download/testdata-v1.0/enwiki-20080103-pages-articles.ibd.tar.zst"
    65  
    66  	// By default, don't limit how many bytes we input into compression.
    67  	defaultMaxBytes int64 = 0
    68  
    69  	// By default the benchmarks will remove any downloaded data after all
    70  	// benchmarks are run, unless the data URL is a local path, in which case
    71  	// it will be left alone.
    72  	//
    73  	// Users may override this behavior. This option is
    74  	// intended purely for debugging purposes.
    75  	//
    76  	//     export VT_MYSQLCTL_COMPRESSION_BENCHMARK_CLEANUP=false
    77  	envVarCleanup = "VT_MYSQLCTL_COMPRESSION_BENCHMARK_CLEANUP"
    78  
    79  	// Users may specify an alternate gzipped URL. This option is intended
    80  	// purely for development and debugging purposes. For example:
    81  	//
    82  	//     export VT_MYSQLCTL_COMPRESSION_BENCHMARK_DATA_URL=https://wiki.mozilla.org/images/f/ff/Example.json.gz
    83  	//
    84  	// A local path can also be specified:
    85  	//
    86  	//     export VT_MYSQLCTL_COMPRESSION_BENCHMARK_DATA_URL=file:///tmp/custom.dat
    87  	envVarDataURL = "VT_MYSQLCTL_COMPRESSION_BENCHMARK_DATA_URL"
    88  
    89  	// Users may override how many bytes are downloaded. This option is
    90  	// intended purely for development and debugging purposes. For example:
    91  	//
    92  	//     export VT_MYSQLCTL_COMPRESSION_BENCHMARK_MAX_BYTES=256
    93  	envVarMaxBytes = "VT_MYSQLCTL_COMPRESSION_BENCHMARK_MAX_BYTES"
    94  )
    95  
    96  func (frc *fnReadCloser) Close() error {
    97  	return frc.closer()
    98  }
    99  
   100  func dataLocalPath(u *url.URL) string {
   101  	if isLocal(u) {
   102  		return u.Path
   103  	}
   104  	// Compute a local path for a file by hashing the URL.
   105  	return path.Join(os.TempDir(), fmt.Sprintf("%x.dat", md5.Sum([]byte(u.String()))))
   106  }
   107  
   108  func dataURL() (*url.URL, error) {
   109  	u := defaultDataURL
   110  
   111  	// Use user-defined URL, if specified.
   112  	if udURL := os.Getenv(envVarDataURL); udURL != "" {
   113  		u = udURL
   114  	}
   115  
   116  	return url.Parse(u)
   117  }
   118  
   119  func downloadData(url, localPath string, maxBytes int64) error {
   120  	var err error
   121  	var rdr io.Reader
   122  
   123  	// If the local path does not exist, download the file from the URL.
   124  	httpClient := http.Client{
   125  		CheckRedirect: func(r *http.Request, via []*http.Request) error {
   126  			r.URL.Opaque = r.URL.Path
   127  			return nil
   128  		},
   129  	}
   130  
   131  	resp, err := httpClient.Get(url)
   132  	if err != nil {
   133  		return fmt.Errorf("failed to get data at URL %q: %v", url, err)
   134  	}
   135  	defer resp.Body.Close()
   136  	rdr = resp.Body
   137  
   138  	// Assume the data we're downloading is compressed with zstd.
   139  	zr, err := zstd.NewReader(rdr)
   140  	if err != nil {
   141  		return fmt.Errorf("failed to decompress data at URL %q: %v", url, err)
   142  	}
   143  	defer zr.Close()
   144  	rdr = zr
   145  
   146  	if maxBytes > 0 {
   147  		rdr = io.LimitReader(rdr, maxBytes)
   148  	}
   149  
   150  	// Create a local file to write the HTTP response to.
   151  	file, err := os.OpenFile(localPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666)
   152  	if err != nil {
   153  		return err
   154  	}
   155  	defer file.Close()
   156  
   157  	// Write the decompressed data to local path.
   158  	if _, err := io.Copy(file, rdr); err != nil {
   159  		return err
   160  	}
   161  
   162  	return nil
   163  }
   164  
   165  func isHTTP(u *url.URL) bool {
   166  	return u.Scheme == "http" || u.Scheme == "https"
   167  }
   168  
   169  func isLocal(u *url.URL) bool {
   170  	return u.Scheme == "file" || (u.Scheme == "" && u.Hostname() == "")
   171  }
   172  
   173  func maxBytes() (int64, error) {
   174  	// Limit how many bytes we unpack from the archive.
   175  	mb := defaultMaxBytes
   176  
   177  	// Use user-defined max bytes, if specified and valid.
   178  	if udMaxBytes := os.Getenv(envVarMaxBytes); udMaxBytes != "" {
   179  		udmb, err := strconv.ParseInt(udMaxBytes, 10, 64)
   180  		if err != nil {
   181  			return mb, err
   182  		}
   183  		mb = udmb
   184  	}
   185  
   186  	return mb, nil
   187  }
   188  
   189  func newBenchmarkCompressEnv(args benchmarkCompressArgs) benchmarkCompressEnv {
   190  	bce := benchmarkCompressEnv{
   191  		benchmarkCompressArgs: args,
   192  	}
   193  	bce.validate()
   194  	bce.prepare()
   195  	return bce
   196  }
   197  
   198  func shouldCleanup(u *url.URL) (bool, error) {
   199  	c := true
   200  
   201  	// Don't cleanup local paths provided by the user.
   202  	if isLocal(u) {
   203  		c = false
   204  	}
   205  
   206  	// Use user-defined cleanup, if specified and valid.
   207  	if udCleanup := os.Getenv(envVarCleanup); udCleanup != "" {
   208  		udc, err := strconv.ParseBool(udCleanup)
   209  		if err != nil {
   210  			return c, err
   211  		}
   212  		c = udc
   213  	}
   214  
   215  	return c, nil
   216  }
   217  
   218  func (bce *benchmarkCompressEnv) compress() {
   219  	var durCompressed time.Duration
   220  	var numUncompressedBytes, numCompressedBytes int64
   221  
   222  	// The Benchmark, Reader and Writer interfaces make it difficult to time
   223  	// compression without frequent calls to {Start,Stop}Timer or including
   224  	// disk read/write times the measurement. Instead we'll use ReportMetric
   225  	// after all loops are completed.
   226  	bce.b.StopTimer()
   227  	bce.b.ResetTimer()
   228  
   229  	for i := 0; i < bce.b.N; i++ {
   230  		logger := logutil.NewMemoryLogger()
   231  
   232  		// Don't write anywhere. We're just interested in compression time.
   233  		w := io.Discard
   234  
   235  		// Keep track of how many compressed bytes come through.
   236  		mw := &meteredWriter{w: w}
   237  
   238  		// Create compressor.
   239  		c := bce.compressor(logger, mw)
   240  
   241  		// Time how long we spend on c.Write.
   242  		tc := &timedWriter{w: c}
   243  
   244  		r, err := bce.reader()
   245  		require.Nil(bce.b, err, "Failed to get data reader.")
   246  
   247  		// Track how many bytes we read.
   248  		mr := &meteredReader{r: r}
   249  
   250  		// It makes sense to use {Start,Stop}Timer here, but we're not
   251  		// interested in how long it takes to read from disk.
   252  		_, err = io.Copy(tc, mr)
   253  
   254  		// Don't defer closing things, otherwise we can exhaust open file limit.
   255  		r.Close()
   256  		c.Close()
   257  
   258  		require.Nil(bce.b, err, logger.Events)
   259  
   260  		// Record how many bytes compressed so we can report these later.
   261  		durCompressed += tc.duration
   262  		numCompressedBytes += mw.count
   263  		numUncompressedBytes += mr.count
   264  	}
   265  
   266  	bce.b.ReportMetric(
   267  		float64(durCompressed.Nanoseconds()/int64(bce.b.N)),
   268  		"ns/op",
   269  	)
   270  
   271  	mbOut := numUncompressedBytes / 1024 / 1024
   272  	bce.b.ReportMetric(
   273  		float64(mbOut)/durCompressed.Seconds(),
   274  		"MB/s",
   275  	)
   276  
   277  	bce.b.ReportMetric(
   278  		float64(numUncompressedBytes)/float64(numCompressedBytes),
   279  		"compression-ratio",
   280  	)
   281  }
   282  
   283  func (bce *benchmarkCompressEnv) compressor(logger logutil.Logger, writer io.Writer) io.WriteCloser {
   284  	var compressor io.WriteCloser
   285  	var err error
   286  
   287  	if bce.builtin != "" {
   288  		compressor, err = newBuiltinCompressor(bce.builtin, writer, logger)
   289  	} else if bce.external != "" {
   290  		compressor, err = newExternalCompressor(context.Background(), bce.external, writer, logger)
   291  	}
   292  
   293  	require.Nil(bce.b, err, "failed to create compressor.")
   294  	return compressor
   295  }
   296  
   297  func (bce *benchmarkCompressEnv) prepare() {
   298  	u, err := dataURL()
   299  	require.NoError(bce.b, err, "failed to get data url")
   300  
   301  	localPath := dataLocalPath(u)
   302  
   303  	if isLocal(u) {
   304  		if _, err := os.Stat(localPath); errors.Is(err, os.ErrNotExist) {
   305  			require.Failf(bce.b, "local path does not exist", localPath)
   306  		}
   307  	} else if isHTTP(u) {
   308  		if _, err := os.Stat(localPath); errors.Is(err, os.ErrNotExist) {
   309  			mb, _ := maxBytes()
   310  			bce.b.Logf("downloading data from %s", u.String())
   311  			if err := downloadData(u.String(), localPath, mb); err != nil {
   312  				require.Failf(bce.b, "failed to download data", err.Error())
   313  			}
   314  		}
   315  	} else {
   316  		require.Failf(bce.b, "don't know how to get data from url", u.String())
   317  	}
   318  }
   319  
   320  func (bce *benchmarkCompressEnv) reader() (io.ReadCloser, error) {
   321  	var r io.Reader
   322  
   323  	u, _ := dataURL()
   324  
   325  	f, err := os.Open(dataLocalPath(u))
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  	r = f
   330  
   331  	mb, _ := maxBytes()
   332  	if mb > 0 {
   333  		r = io.LimitReader(f, mb)
   334  	}
   335  
   336  	buf := bufio.NewReaderSize(r, 2*1024*1024)
   337  	return &fnReadCloser{buf, f.Close}, nil
   338  }
   339  
   340  func (bce *benchmarkCompressEnv) validate() {
   341  	if bce.external != "" {
   342  		cmdArgs := strings.Split(bce.external, " ")
   343  
   344  		_, err := validateExternalCmd(cmdArgs[0])
   345  		if err != nil {
   346  			bce.b.Skipf("command %q not available in this host: %v; skipping...", cmdArgs[0], err)
   347  		}
   348  	}
   349  
   350  	if bce.builtin == "" && bce.external == "" {
   351  		require.Fail(bce.b, "either builtin or external compressor must be specified.")
   352  	}
   353  }
   354  
   355  func (mr *meteredReader) Read(p []byte) (nbytes int, err error) {
   356  	nbytes, err = mr.r.Read(p)
   357  	mr.count += int64(nbytes)
   358  	return
   359  }
   360  
   361  func (mw *meteredWriter) Write(p []byte) (nbytes int, err error) {
   362  	nbytes, err = mw.w.Write(p)
   363  	mw.count += int64(nbytes)
   364  	return
   365  }
   366  
   367  func (tw *timedWriter) Write(p []byte) (nbytes int, err error) {
   368  	start := time.Now()
   369  	nbytes, err = tw.w.Write(p)
   370  	tw.duration += time.Since(start)
   371  	return
   372  }
   373  
   374  func TestMain(m *testing.M) {
   375  	code := m.Run()
   376  
   377  	u, _ := dataURL()
   378  	localPath := dataLocalPath(u)
   379  
   380  	cleanup, err := shouldCleanup(u)
   381  	if cleanup {
   382  		msg := "cleaning up %q"
   383  		args := []any{localPath}
   384  
   385  		if err != nil {
   386  			args = append(args, err)
   387  			msg = msg + "; %v"
   388  		}
   389  
   390  		fmt.Printf(msg+"\n", args...)
   391  		if _, err := os.Stat(localPath); !errors.Is(err, os.ErrNotExist) {
   392  			os.Remove(localPath)
   393  		}
   394  	}
   395  
   396  	os.Exit(code)
   397  }
   398  
   399  func BenchmarkCompressLz4Builtin(b *testing.B) {
   400  	env := newBenchmarkCompressEnv(benchmarkCompressArgs{
   401  		b:       b,
   402  		builtin: Lz4Compressor,
   403  	})
   404  	env.compress()
   405  }
   406  
   407  func BenchmarkCompressPargzipBuiltin(b *testing.B) {
   408  	env := newBenchmarkCompressEnv(benchmarkCompressArgs{
   409  		b:       b,
   410  		builtin: PargzipCompressor,
   411  	})
   412  	env.compress()
   413  }
   414  
   415  func BenchmarkCompressPgzipBuiltin(b *testing.B) {
   416  	env := newBenchmarkCompressEnv(benchmarkCompressArgs{
   417  		b:       b,
   418  		builtin: PgzipCompressor,
   419  	})
   420  	env.compress()
   421  }
   422  
   423  func BenchmarkCompressZstdBuiltin(b *testing.B) {
   424  	env := newBenchmarkCompressEnv(benchmarkCompressArgs{
   425  		b:       b,
   426  		builtin: ZstdCompressor,
   427  	})
   428  	env.compress()
   429  }
   430  
   431  func BenchmarkCompressZstdExternal(b *testing.B) {
   432  	env := newBenchmarkCompressEnv(benchmarkCompressArgs{
   433  		b:        b,
   434  		external: fmt.Sprintf("zstd -%d -c", compressionLevel),
   435  	})
   436  	env.compress()
   437  }
   438  
   439  func BenchmarkCompressZstdExternalFast4(b *testing.B) {
   440  	env := newBenchmarkCompressEnv(benchmarkCompressArgs{
   441  		b:        b,
   442  		external: fmt.Sprintf("zstd -%d --fast=4 -c", compressionLevel),
   443  	})
   444  	env.compress()
   445  }
   446  
   447  func BenchmarkCompressZstdExternalT0(b *testing.B) {
   448  	env := newBenchmarkCompressEnv(benchmarkCompressArgs{
   449  		b:        b,
   450  		external: fmt.Sprintf("zstd -%d -T0 -c", compressionLevel),
   451  	})
   452  	env.compress()
   453  }
   454  
   455  func BenchmarkCompressZstdExternalT4(b *testing.B) {
   456  	env := newBenchmarkCompressEnv(benchmarkCompressArgs{
   457  		b:        b,
   458  		external: fmt.Sprintf("zstd -%d -T4 -c", compressionLevel),
   459  	})
   460  	env.compress()
   461  }