vitess.io/vitess@v0.16.2/go/vt/mysqlctl/compression.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package mysqlctl
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"os/exec"
    25  	"sync"
    26  
    27  	"github.com/google/shlex"
    28  	"github.com/klauspost/compress/zstd"
    29  	"github.com/klauspost/pgzip"
    30  	"github.com/pierrec/lz4"
    31  	"github.com/planetscale/pargzip"
    32  	"github.com/spf13/pflag"
    33  
    34  	"vitess.io/vitess/go/vt/logutil"
    35  	"vitess.io/vitess/go/vt/servenv"
    36  	"vitess.io/vitess/go/vt/vterrors"
    37  )
    38  
    39  const (
    40  	PgzipCompressor    = "pgzip"
    41  	PargzipCompressor  = "pargzip"
    42  	ZstdCompressor     = "zstd"
    43  	Lz4Compressor      = "lz4"
    44  	ExternalCompressor = "external"
    45  )
    46  
    47  var (
    48  	compressionLevel = 1
    49  	// CompressionEngineName specifies which compressor/decompressor to use
    50  	CompressionEngineName = "pargzip"
    51  	// ExternalCompressorCmd / ExternalDecompressorCmd specify the external commands compress/decompress the backups
    52  	ExternalCompressorCmd   string
    53  	ExternalCompressorExt   string
    54  	ExternalDecompressorCmd string
    55  
    56  	errUnsupportedDeCompressionEngine = errors.New("unsupported engine in MANIFEST. You need to provide --external-decompressor if using 'external' compression engine")
    57  	errUnsupportedCompressionEngine   = errors.New("unsupported engine value for --compression-engine-name. supported values are 'external', 'pgzip', 'pargzip', 'zstd', 'lz4'")
    58  
    59  	// this is used by getEngineFromExtension() to figure out which engine to use in case the user didn't specify
    60  	engineExtensions = map[string][]string{
    61  		".gz":  {PgzipCompressor, PargzipCompressor},
    62  		".lz4": {Lz4Compressor},
    63  		".zst": {ZstdCompressor},
    64  	}
    65  )
    66  
    67  func init() {
    68  	for _, cmd := range []string{"vtbackup", "vtcombo", "vttablet", "vttestserver", "vtctld", "vtctldclient"} {
    69  		servenv.OnParseFor(cmd, registerBackupCompressionFlags)
    70  	}
    71  }
    72  
    73  func registerBackupCompressionFlags(fs *pflag.FlagSet) {
    74  	fs.IntVar(&compressionLevel, "compression-level", compressionLevel, "what level to pass to the compressor.")
    75  	fs.StringVar(&CompressionEngineName, "compression-engine-name", CompressionEngineName, "compressor engine used for compression.")
    76  	fs.StringVar(&ExternalCompressorCmd, "external-compressor", ExternalCompressorCmd, "command with arguments to use when compressing a backup.")
    77  	fs.StringVar(&ExternalCompressorExt, "external-compressor-extension", ExternalCompressorExt, "extension to use when using an external compressor.")
    78  	fs.StringVar(&ExternalDecompressorCmd, "external-decompressor", ExternalDecompressorCmd, "command with arguments to use when decompressing a backup.")
    79  }
    80  
    81  func getExtensionFromEngine(engine string) (string, error) {
    82  	for ext, eng := range engineExtensions {
    83  		for _, e := range eng {
    84  			if e == engine {
    85  				return ext, nil
    86  			}
    87  		}
    88  	}
    89  	return "", fmt.Errorf("%w %q", errUnsupportedCompressionEngine, engine)
    90  }
    91  
    92  // Validates if the external decompressor exists and return its path.
    93  func validateExternalCmd(cmd string) (string, error) {
    94  	if cmd == "" {
    95  		return "", errors.New("external command is empty")
    96  	}
    97  	return exec.LookPath(cmd)
    98  }
    99  
   100  // Validate compression engine is one of the supported values.
   101  func validateExternalCompressionEngineName(engine string) error {
   102  	switch engine {
   103  	case PgzipCompressor:
   104  	case PargzipCompressor:
   105  	case Lz4Compressor:
   106  	case ZstdCompressor:
   107  	case ExternalCompressor:
   108  	default:
   109  		return fmt.Errorf("%w value: %q", errUnsupportedCompressionEngine, engine)
   110  	}
   111  
   112  	return nil
   113  }
   114  
   115  func prepareExternalCmd(ctx context.Context, cmdStr string) (*exec.Cmd, error) {
   116  	cmdArgs, err := shlex.Split(cmdStr)
   117  	if err != nil {
   118  		return nil, err
   119  	}
   120  	if len(cmdArgs) < 1 {
   121  		return nil, errors.New("external command is empty")
   122  	}
   123  	cmdPath, err := validateExternalCmd(cmdArgs[0])
   124  	if err != nil {
   125  		return nil, err
   126  	}
   127  	return exec.CommandContext(ctx, cmdPath, cmdArgs[1:]...), nil
   128  }
   129  
   130  // This returns a writer that writes the compressed output of the external command to the provided writer.
   131  func newExternalCompressor(ctx context.Context, cmdStr string, writer io.Writer, logger logutil.Logger) (io.WriteCloser, error) {
   132  	logger.Infof("Compressing using external command: %q", cmdStr)
   133  	// validate value of compression engine name
   134  	if err := validateExternalCompressionEngineName(CompressionEngineName); err != nil {
   135  		return nil, err
   136  	}
   137  
   138  	cmd, err := prepareExternalCmd(ctx, cmdStr)
   139  	if err != nil {
   140  		return nil, vterrors.Wrap(err, "unable to start external command")
   141  	}
   142  	compressor := &externalCompressor{cmd: cmd}
   143  	cmd.Stdout = writer
   144  	cmdIn, err := cmd.StdinPipe()
   145  	if err != nil {
   146  		return nil, vterrors.Wrap(err, "cannot create external ompressor stdin pipe")
   147  	}
   148  	compressor.stdin = cmdIn
   149  	cmdErr, err := cmd.StderrPipe()
   150  	if err != nil {
   151  		return nil, vterrors.Wrap(err, "cannot create external ompressor stderr pipe")
   152  	}
   153  
   154  	if err := cmd.Start(); err != nil {
   155  		return nil, vterrors.Wrap(err, "can't start external decompressor")
   156  	}
   157  
   158  	compressor.wg.Add(1) // we wait for the gorouting to finish when we call Close() on the writer
   159  	go scanLinesToLogger("compressor stderr", cmdErr, logger, compressor.wg.Done)
   160  	return compressor, nil
   161  }
   162  
   163  // This returns a reader that reads the compressed input and passes it to the external command to be decompressed. Calls to its
   164  // Read() will return the uncompressed data until EOF.
   165  func newExternalDecompressor(ctx context.Context, cmdStr string, reader io.Reader, logger logutil.Logger) (io.ReadCloser, error) {
   166  	logger.Infof("Decompressing using external command: %q", cmdStr)
   167  
   168  	cmd, err := prepareExternalCmd(ctx, cmdStr)
   169  	if err != nil {
   170  		return nil, vterrors.Wrap(err, "unable to start external command")
   171  	}
   172  	decompressor := &externalDecompressor{cmd: cmd}
   173  	cmd.Stdin = reader
   174  	cmdOut, err := cmd.StdoutPipe()
   175  	if err != nil {
   176  		return nil, vterrors.Wrap(err, "cannot create external decompressor stdout pipe")
   177  	}
   178  	decompressor.stdout = cmdOut
   179  	cmdErr, err := cmd.StderrPipe()
   180  	if err != nil {
   181  		return nil, vterrors.Wrap(err, "cannot create external decompressor stderr pipe")
   182  	}
   183  
   184  	if err := cmd.Start(); err != nil {
   185  		return nil, vterrors.Wrap(err, "can't start external decompressor")
   186  	}
   187  
   188  	decompressor.wg.Add(1) // we wait for the gorouting to finish when we call Close() on the reader
   189  	go scanLinesToLogger("decompressor stderr", cmdErr, logger, decompressor.wg.Done)
   190  	return decompressor, nil
   191  }
   192  
   193  // This returns a reader that will decompress the underlying provided reader and will use the specified supported engine.
   194  func newBuiltinDecompressor(engine string, reader io.Reader, logger logutil.Logger) (decompressor io.ReadCloser, err error) {
   195  	if engine == PargzipCompressor {
   196  		logger.Warningf(`engine "pargzip" doesn't support decompression, using "pgzip" instead`)
   197  		engine = PgzipCompressor
   198  	}
   199  
   200  	switch engine {
   201  	case PgzipCompressor:
   202  		d, err := pgzip.NewReader(reader)
   203  		if err != nil {
   204  			return nil, err
   205  		}
   206  		decompressor = d
   207  	case "lz4":
   208  		decompressor = io.NopCloser(lz4.NewReader(reader))
   209  	case "zstd":
   210  		d, err := zstd.NewReader(reader)
   211  		if err != nil {
   212  			return nil, err
   213  		}
   214  		decompressor = d.IOReadCloser()
   215  	default:
   216  		err = fmt.Errorf("Unkown decompressor engine: %q", engine)
   217  		return decompressor, err
   218  	}
   219  
   220  	logger.Infof("Decompressing backup using engine %q", engine)
   221  	return decompressor, err
   222  }
   223  
   224  // This returns a writer that will compress the data using the specified engine before writing to the underlying writer.
   225  func newBuiltinCompressor(engine string, writer io.Writer, logger logutil.Logger) (compressor io.WriteCloser, err error) {
   226  	switch engine {
   227  	case PgzipCompressor:
   228  		gzip, err := pgzip.NewWriterLevel(writer, compressionLevel)
   229  		if err != nil {
   230  			return compressor, vterrors.Wrap(err, "cannot create gzip compressor")
   231  		}
   232  		gzip.SetConcurrency(backupCompressBlockSize, backupCompressBlocks)
   233  		compressor = gzip
   234  	case PargzipCompressor:
   235  		gzip := pargzip.NewWriter(writer)
   236  		gzip.ChunkSize = backupCompressBlockSize
   237  		gzip.Parallel = backupCompressBlocks
   238  		gzip.CompressionLevel = compressionLevel
   239  		compressor = gzip
   240  	case Lz4Compressor:
   241  		lz4Writer := lz4.NewWriter(writer).WithConcurrency(backupCompressBlocks)
   242  		lz4Writer.Header = lz4.Header{
   243  			CompressionLevel: compressionLevel,
   244  		}
   245  		compressor = lz4Writer
   246  	case ZstdCompressor:
   247  		zst, err := zstd.NewWriter(writer, zstd.WithEncoderLevel(zstd.EncoderLevel(compressionLevel)))
   248  		if err != nil {
   249  			return compressor, vterrors.Wrap(err, "cannot create zstd compressor")
   250  		}
   251  		compressor = zst
   252  	default:
   253  		err = fmt.Errorf("%w value: %q", errUnsupportedCompressionEngine, engine)
   254  		return compressor, err
   255  	}
   256  
   257  	logger.Infof("Compressing backup using engine %q", engine)
   258  	return
   259  }
   260  
   261  // This struct wraps the underlying exec.Cmd and implements the io.WriteCloser interface.
   262  type externalCompressor struct {
   263  	cmd   *exec.Cmd
   264  	stdin io.WriteCloser
   265  	wg    sync.WaitGroup
   266  }
   267  
   268  func (e *externalCompressor) Write(p []byte) (n int, err error) {
   269  	return e.stdin.Write(p)
   270  }
   271  
   272  func (e *externalCompressor) Close() error {
   273  	if err := e.stdin.Close(); err != nil {
   274  		return err
   275  	}
   276  
   277  	// wait for the stderr to finish reading as well
   278  	e.wg.Wait()
   279  	return e.cmd.Wait()
   280  }
   281  
   282  // This struct wraps the underlying exec.Cmd and implements the io.ReadCloser interface.
   283  type externalDecompressor struct {
   284  	cmd    *exec.Cmd
   285  	stdout io.ReadCloser
   286  	wg     sync.WaitGroup
   287  }
   288  
   289  func (e *externalDecompressor) Read(p []byte) (n int, err error) {
   290  	return e.stdout.Read(p)
   291  }
   292  
   293  func (e *externalDecompressor) Close() error {
   294  	// wait for the stderr to finish reading as well
   295  	e.wg.Wait()
   296  
   297  	// exec.Cmd.Wait() will also close the stdout pipe, so we don't need to call it directly
   298  	return e.cmd.Wait()
   299  }