vitess.io/vitess@v0.16.2/go/vt/mysqlctl/compression.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package mysqlctl 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "io" 24 "os/exec" 25 "sync" 26 27 "github.com/google/shlex" 28 "github.com/klauspost/compress/zstd" 29 "github.com/klauspost/pgzip" 30 "github.com/pierrec/lz4" 31 "github.com/planetscale/pargzip" 32 "github.com/spf13/pflag" 33 34 "vitess.io/vitess/go/vt/logutil" 35 "vitess.io/vitess/go/vt/servenv" 36 "vitess.io/vitess/go/vt/vterrors" 37 ) 38 39 const ( 40 PgzipCompressor = "pgzip" 41 PargzipCompressor = "pargzip" 42 ZstdCompressor = "zstd" 43 Lz4Compressor = "lz4" 44 ExternalCompressor = "external" 45 ) 46 47 var ( 48 compressionLevel = 1 49 // CompressionEngineName specifies which compressor/decompressor to use 50 CompressionEngineName = "pargzip" 51 // ExternalCompressorCmd / ExternalDecompressorCmd specify the external commands compress/decompress the backups 52 ExternalCompressorCmd string 53 ExternalCompressorExt string 54 ExternalDecompressorCmd string 55 56 errUnsupportedDeCompressionEngine = errors.New("unsupported engine in MANIFEST. You need to provide --external-decompressor if using 'external' compression engine") 57 errUnsupportedCompressionEngine = errors.New("unsupported engine value for --compression-engine-name. supported values are 'external', 'pgzip', 'pargzip', 'zstd', 'lz4'") 58 59 // this is used by getEngineFromExtension() to figure out which engine to use in case the user didn't specify 60 engineExtensions = map[string][]string{ 61 ".gz": {PgzipCompressor, PargzipCompressor}, 62 ".lz4": {Lz4Compressor}, 63 ".zst": {ZstdCompressor}, 64 } 65 ) 66 67 func init() { 68 for _, cmd := range []string{"vtbackup", "vtcombo", "vttablet", "vttestserver", "vtctld", "vtctldclient"} { 69 servenv.OnParseFor(cmd, registerBackupCompressionFlags) 70 } 71 } 72 73 func registerBackupCompressionFlags(fs *pflag.FlagSet) { 74 fs.IntVar(&compressionLevel, "compression-level", compressionLevel, "what level to pass to the compressor.") 75 fs.StringVar(&CompressionEngineName, "compression-engine-name", CompressionEngineName, "compressor engine used for compression.") 76 fs.StringVar(&ExternalCompressorCmd, "external-compressor", ExternalCompressorCmd, "command with arguments to use when compressing a backup.") 77 fs.StringVar(&ExternalCompressorExt, "external-compressor-extension", ExternalCompressorExt, "extension to use when using an external compressor.") 78 fs.StringVar(&ExternalDecompressorCmd, "external-decompressor", ExternalDecompressorCmd, "command with arguments to use when decompressing a backup.") 79 } 80 81 func getExtensionFromEngine(engine string) (string, error) { 82 for ext, eng := range engineExtensions { 83 for _, e := range eng { 84 if e == engine { 85 return ext, nil 86 } 87 } 88 } 89 return "", fmt.Errorf("%w %q", errUnsupportedCompressionEngine, engine) 90 } 91 92 // Validates if the external decompressor exists and return its path. 93 func validateExternalCmd(cmd string) (string, error) { 94 if cmd == "" { 95 return "", errors.New("external command is empty") 96 } 97 return exec.LookPath(cmd) 98 } 99 100 // Validate compression engine is one of the supported values. 101 func validateExternalCompressionEngineName(engine string) error { 102 switch engine { 103 case PgzipCompressor: 104 case PargzipCompressor: 105 case Lz4Compressor: 106 case ZstdCompressor: 107 case ExternalCompressor: 108 default: 109 return fmt.Errorf("%w value: %q", errUnsupportedCompressionEngine, engine) 110 } 111 112 return nil 113 } 114 115 func prepareExternalCmd(ctx context.Context, cmdStr string) (*exec.Cmd, error) { 116 cmdArgs, err := shlex.Split(cmdStr) 117 if err != nil { 118 return nil, err 119 } 120 if len(cmdArgs) < 1 { 121 return nil, errors.New("external command is empty") 122 } 123 cmdPath, err := validateExternalCmd(cmdArgs[0]) 124 if err != nil { 125 return nil, err 126 } 127 return exec.CommandContext(ctx, cmdPath, cmdArgs[1:]...), nil 128 } 129 130 // This returns a writer that writes the compressed output of the external command to the provided writer. 131 func newExternalCompressor(ctx context.Context, cmdStr string, writer io.Writer, logger logutil.Logger) (io.WriteCloser, error) { 132 logger.Infof("Compressing using external command: %q", cmdStr) 133 // validate value of compression engine name 134 if err := validateExternalCompressionEngineName(CompressionEngineName); err != nil { 135 return nil, err 136 } 137 138 cmd, err := prepareExternalCmd(ctx, cmdStr) 139 if err != nil { 140 return nil, vterrors.Wrap(err, "unable to start external command") 141 } 142 compressor := &externalCompressor{cmd: cmd} 143 cmd.Stdout = writer 144 cmdIn, err := cmd.StdinPipe() 145 if err != nil { 146 return nil, vterrors.Wrap(err, "cannot create external ompressor stdin pipe") 147 } 148 compressor.stdin = cmdIn 149 cmdErr, err := cmd.StderrPipe() 150 if err != nil { 151 return nil, vterrors.Wrap(err, "cannot create external ompressor stderr pipe") 152 } 153 154 if err := cmd.Start(); err != nil { 155 return nil, vterrors.Wrap(err, "can't start external decompressor") 156 } 157 158 compressor.wg.Add(1) // we wait for the gorouting to finish when we call Close() on the writer 159 go scanLinesToLogger("compressor stderr", cmdErr, logger, compressor.wg.Done) 160 return compressor, nil 161 } 162 163 // This returns a reader that reads the compressed input and passes it to the external command to be decompressed. Calls to its 164 // Read() will return the uncompressed data until EOF. 165 func newExternalDecompressor(ctx context.Context, cmdStr string, reader io.Reader, logger logutil.Logger) (io.ReadCloser, error) { 166 logger.Infof("Decompressing using external command: %q", cmdStr) 167 168 cmd, err := prepareExternalCmd(ctx, cmdStr) 169 if err != nil { 170 return nil, vterrors.Wrap(err, "unable to start external command") 171 } 172 decompressor := &externalDecompressor{cmd: cmd} 173 cmd.Stdin = reader 174 cmdOut, err := cmd.StdoutPipe() 175 if err != nil { 176 return nil, vterrors.Wrap(err, "cannot create external decompressor stdout pipe") 177 } 178 decompressor.stdout = cmdOut 179 cmdErr, err := cmd.StderrPipe() 180 if err != nil { 181 return nil, vterrors.Wrap(err, "cannot create external decompressor stderr pipe") 182 } 183 184 if err := cmd.Start(); err != nil { 185 return nil, vterrors.Wrap(err, "can't start external decompressor") 186 } 187 188 decompressor.wg.Add(1) // we wait for the gorouting to finish when we call Close() on the reader 189 go scanLinesToLogger("decompressor stderr", cmdErr, logger, decompressor.wg.Done) 190 return decompressor, nil 191 } 192 193 // This returns a reader that will decompress the underlying provided reader and will use the specified supported engine. 194 func newBuiltinDecompressor(engine string, reader io.Reader, logger logutil.Logger) (decompressor io.ReadCloser, err error) { 195 if engine == PargzipCompressor { 196 logger.Warningf(`engine "pargzip" doesn't support decompression, using "pgzip" instead`) 197 engine = PgzipCompressor 198 } 199 200 switch engine { 201 case PgzipCompressor: 202 d, err := pgzip.NewReader(reader) 203 if err != nil { 204 return nil, err 205 } 206 decompressor = d 207 case "lz4": 208 decompressor = io.NopCloser(lz4.NewReader(reader)) 209 case "zstd": 210 d, err := zstd.NewReader(reader) 211 if err != nil { 212 return nil, err 213 } 214 decompressor = d.IOReadCloser() 215 default: 216 err = fmt.Errorf("Unkown decompressor engine: %q", engine) 217 return decompressor, err 218 } 219 220 logger.Infof("Decompressing backup using engine %q", engine) 221 return decompressor, err 222 } 223 224 // This returns a writer that will compress the data using the specified engine before writing to the underlying writer. 225 func newBuiltinCompressor(engine string, writer io.Writer, logger logutil.Logger) (compressor io.WriteCloser, err error) { 226 switch engine { 227 case PgzipCompressor: 228 gzip, err := pgzip.NewWriterLevel(writer, compressionLevel) 229 if err != nil { 230 return compressor, vterrors.Wrap(err, "cannot create gzip compressor") 231 } 232 gzip.SetConcurrency(backupCompressBlockSize, backupCompressBlocks) 233 compressor = gzip 234 case PargzipCompressor: 235 gzip := pargzip.NewWriter(writer) 236 gzip.ChunkSize = backupCompressBlockSize 237 gzip.Parallel = backupCompressBlocks 238 gzip.CompressionLevel = compressionLevel 239 compressor = gzip 240 case Lz4Compressor: 241 lz4Writer := lz4.NewWriter(writer).WithConcurrency(backupCompressBlocks) 242 lz4Writer.Header = lz4.Header{ 243 CompressionLevel: compressionLevel, 244 } 245 compressor = lz4Writer 246 case ZstdCompressor: 247 zst, err := zstd.NewWriter(writer, zstd.WithEncoderLevel(zstd.EncoderLevel(compressionLevel))) 248 if err != nil { 249 return compressor, vterrors.Wrap(err, "cannot create zstd compressor") 250 } 251 compressor = zst 252 default: 253 err = fmt.Errorf("%w value: %q", errUnsupportedCompressionEngine, engine) 254 return compressor, err 255 } 256 257 logger.Infof("Compressing backup using engine %q", engine) 258 return 259 } 260 261 // This struct wraps the underlying exec.Cmd and implements the io.WriteCloser interface. 262 type externalCompressor struct { 263 cmd *exec.Cmd 264 stdin io.WriteCloser 265 wg sync.WaitGroup 266 } 267 268 func (e *externalCompressor) Write(p []byte) (n int, err error) { 269 return e.stdin.Write(p) 270 } 271 272 func (e *externalCompressor) Close() error { 273 if err := e.stdin.Close(); err != nil { 274 return err 275 } 276 277 // wait for the stderr to finish reading as well 278 e.wg.Wait() 279 return e.cmd.Wait() 280 } 281 282 // This struct wraps the underlying exec.Cmd and implements the io.ReadCloser interface. 283 type externalDecompressor struct { 284 cmd *exec.Cmd 285 stdout io.ReadCloser 286 wg sync.WaitGroup 287 } 288 289 func (e *externalDecompressor) Read(p []byte) (n int, err error) { 290 return e.stdout.Read(p) 291 } 292 293 func (e *externalDecompressor) Close() error { 294 // wait for the stderr to finish reading as well 295 e.wg.Wait() 296 297 // exec.Cmd.Wait() will also close the stdout pipe, so we don't need to call it directly 298 return e.cmd.Wait() 299 }