vitess.io/vitess@v0.16.2/go/vt/mysqlctl/backup.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package mysqlctl
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"os"
    23  	"path/filepath"
    24  	"strconv"
    25  	"strings"
    26  	"time"
    27  
    28  	"github.com/spf13/pflag"
    29  
    30  	"vitess.io/vitess/go/vt/servenv"
    31  
    32  	"context"
    33  
    34  	"vitess.io/vitess/go/mysql"
    35  	"vitess.io/vitess/go/stats"
    36  	"vitess.io/vitess/go/vt/log"
    37  	"vitess.io/vitess/go/vt/mysqlctl/backupstorage"
    38  	"vitess.io/vitess/go/vt/proto/vtrpc"
    39  	"vitess.io/vitess/go/vt/topo/topoproto"
    40  	"vitess.io/vitess/go/vt/vterrors"
    41  
    42  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    43  )
    44  
    45  // This file handles the backup and restore related code
    46  
    47  const (
    48  	// the three bases for files to restore
    49  	backupInnodbDataHomeDir     = "InnoDBData"
    50  	backupInnodbLogGroupHomeDir = "InnoDBLog"
    51  	backupBinlogDir             = "BinLog"
    52  	backupData                  = "Data"
    53  
    54  	// backupManifestFileName is the MANIFEST file name within a backup.
    55  	backupManifestFileName = "MANIFEST"
    56  	// RestoreState is the name of the sentinel file used to detect whether a previous restore
    57  	// terminated abnormally
    58  	RestoreState = "restore_in_progress"
    59  	// BackupTimestampFormat is the format in which we save BackupTime and FinishedTime
    60  	BackupTimestampFormat = "2006-01-02.150405"
    61  )
    62  
    63  const (
    64  	// replicationStartDeadline is the deadline for starting replication
    65  	replicationStartDeadline = 30
    66  )
    67  
    68  var (
    69  	// ErrNoBackup is returned when there is no backup.
    70  	ErrNoBackup = errors.New("no available backup")
    71  
    72  	// ErrNoCompleteBackup is returned when there is at least one backup,
    73  	// but none of them are complete.
    74  	ErrNoCompleteBackup = errors.New("backup(s) found but none are complete")
    75  
    76  	// backupStorageCompress can be set to false to not use gzip
    77  	// on the backups.
    78  	backupStorageCompress = true
    79  
    80  	// backupCompressBlockSize is the splitting size for each
    81  	// compressed block
    82  	backupCompressBlockSize = 250000
    83  
    84  	// backupCompressBlocks is the number of blocks that are processed
    85  	// once before the writer blocks
    86  	backupCompressBlocks = 2
    87  
    88  	backupDuration  = stats.NewGauge("backup_duration_seconds", "How long it took to complete the last backup operation (in seconds)")
    89  	restoreDuration = stats.NewGauge("restore_duration_seconds", "How long it took to complete the last restore operation (in seconds)")
    90  )
    91  
    92  func init() {
    93  	for _, cmd := range []string{"vtcombo", "vttablet", "vttestserver", "vtbackup", "vtctld"} {
    94  		servenv.OnParseFor(cmd, registerBackupFlags)
    95  	}
    96  }
    97  
    98  func registerBackupFlags(fs *pflag.FlagSet) {
    99  	fs.BoolVar(&backupStorageCompress, "backup_storage_compress", backupStorageCompress, "if set, the backup files will be compressed.")
   100  	fs.IntVar(&backupCompressBlockSize, "backup_storage_block_size", backupCompressBlockSize, "if backup_storage_compress is true, backup_storage_block_size sets the byte size for each block while compressing (default is 250000).")
   101  	fs.IntVar(&backupCompressBlocks, "backup_storage_number_blocks", backupCompressBlocks, "if backup_storage_compress is true, backup_storage_number_blocks sets the number of blocks that can be processed, at once, before the writer blocks, during compression (default is 2). It should be equal to the number of CPUs available for compression.")
   102  }
   103  
   104  // Backup is the main entry point for a backup:
   105  // - uses the BackupStorage service to store a new backup
   106  // - shuts down Mysqld during the backup
   107  // - remember if we were replicating, restore the exact same state
   108  func Backup(ctx context.Context, params BackupParams) error {
   109  	startTs := time.Now()
   110  	backupDir := GetBackupDir(params.Keyspace, params.Shard)
   111  	name := fmt.Sprintf("%v.%v", params.BackupTime.UTC().Format(BackupTimestampFormat), params.TabletAlias)
   112  	// Start the backup with the BackupStorage.
   113  	bs, err := backupstorage.GetBackupStorage()
   114  	if err != nil {
   115  		return vterrors.Wrap(err, "unable to get backup storage")
   116  	}
   117  	defer bs.Close()
   118  	bh, err := bs.StartBackup(ctx, backupDir, name)
   119  	if err != nil {
   120  		return vterrors.Wrap(err, "StartBackup failed")
   121  	}
   122  
   123  	be, err := GetBackupEngine()
   124  	if err != nil {
   125  		return vterrors.Wrap(err, "failed to find backup engine")
   126  	}
   127  
   128  	// Take the backup, and either AbortBackup or EndBackup.
   129  	usable, err := be.ExecuteBackup(ctx, params, bh)
   130  	logger := params.Logger
   131  	var finishErr error
   132  	if usable {
   133  		finishErr = bh.EndBackup(ctx)
   134  	} else {
   135  		logger.Errorf2(err, "backup is not usable, aborting it")
   136  		finishErr = bh.AbortBackup(ctx)
   137  	}
   138  	if err != nil {
   139  		if finishErr != nil {
   140  			// We have a backup error, and we also failed
   141  			// to finish the backup: just log the backup
   142  			// finish error, return the backup error.
   143  			logger.Errorf2(finishErr, "failed to finish backup: %v")
   144  		}
   145  		return err
   146  	}
   147  
   148  	// The backup worked, so just return the finish error, if any.
   149  	backupDuration.Set(int64(time.Since(startTs).Seconds()))
   150  	return finishErr
   151  }
   152  
   153  // ParseBackupName parses the backup name for a given dir/name, according to
   154  // the format generated by mysqlctl.Backup. An error is returned only if the
   155  // backup name does not have the expected number of parts; errors parsing the
   156  // timestamp and tablet alias are logged, and a nil value is returned for those
   157  // fields in case of error.
   158  func ParseBackupName(dir string, name string) (backupTime *time.Time, alias *topodatapb.TabletAlias, err error) {
   159  	parts := strings.Split(name, ".")
   160  	if len(parts) != 3 {
   161  		return nil, nil, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "cannot backup name %s, expected <date>.<time>.<tablet_alias>", name)
   162  	}
   163  
   164  	// parts[0]: date part of BackupTimestampFormat
   165  	// parts[1]: time part of BackupTimestampFormat
   166  	// parts[2]: tablet alias
   167  	timestamp := strings.Join(parts[:2], ".")
   168  	aliasStr := parts[2]
   169  
   170  	btime, err := time.Parse(BackupTimestampFormat, timestamp)
   171  	if err != nil {
   172  		log.Errorf("error parsing backup time for %s/%s: %s", dir, name, err)
   173  	} else {
   174  		backupTime = &btime
   175  	}
   176  
   177  	alias, err = topoproto.ParseTabletAlias(aliasStr)
   178  	if err != nil {
   179  		log.Errorf("error parsing tablet alias for %s/%s: %s", dir, name, err)
   180  		alias = nil
   181  	}
   182  
   183  	return backupTime, alias, nil
   184  }
   185  
   186  // checkNoDB makes sure there is no user data already there.
   187  // Used by Restore, as we do not want to destroy an existing DB.
   188  // The user's database name must be given since we ignore all others.
   189  // Returns (true, nil) if the specified DB doesn't exist.
   190  // Returns (false, nil) if the check succeeds but the condition is not
   191  // satisfied (there is a DB).
   192  // Returns (false, non-nil error) if one occurs while trying to perform the check.
   193  func checkNoDB(ctx context.Context, mysqld MysqlDaemon, dbName string) (bool, error) {
   194  	qr, err := mysqld.FetchSuperQuery(ctx, "SHOW DATABASES")
   195  	if err != nil {
   196  		return false, vterrors.Wrap(err, "checkNoDB failed")
   197  	}
   198  
   199  	for _, row := range qr.Rows {
   200  		if row[0].ToString() == dbName {
   201  			// found active db
   202  			log.Warningf("checkNoDB failed, found active db %v", dbName)
   203  			return false, nil
   204  		}
   205  	}
   206  	return true, nil
   207  }
   208  
   209  // removeExistingFiles will delete existing files in the data dir to prevent
   210  // conflicts with the restored archive. In particular, binlogs can be created
   211  // even during initial bootstrap, and these can interfere with configuring
   212  // replication if kept around after the restore.
   213  func removeExistingFiles(cnf *Mycnf) error {
   214  	paths := map[string]string{
   215  		"BinLogPath.*":          cnf.BinLogPath,
   216  		"DataDir":               cnf.DataDir,
   217  		"InnodbDataHomeDir":     cnf.InnodbDataHomeDir,
   218  		"InnodbLogGroupHomeDir": cnf.InnodbLogGroupHomeDir,
   219  		"RelayLogPath.*":        cnf.RelayLogPath,
   220  		"RelayLogIndexPath":     cnf.RelayLogIndexPath,
   221  		"RelayLogInfoPath":      cnf.RelayLogInfoPath,
   222  	}
   223  	for name, path := range paths {
   224  		if path == "" {
   225  			return vterrors.Errorf(vtrpc.Code_UNKNOWN, "can't remove existing files: %v is unknown", name)
   226  		}
   227  
   228  		if strings.HasSuffix(name, ".*") {
   229  			// These paths are actually filename prefixes, not directories.
   230  			// An extension of the form ".###" is appended by mysqld.
   231  			path += ".*"
   232  			log.Infof("Restore: removing files in %v (%v)", name, path)
   233  			matches, err := filepath.Glob(path)
   234  			if err != nil {
   235  				return vterrors.Wrapf(err, "can't expand path glob %q", path)
   236  			}
   237  			for _, match := range matches {
   238  				if err := os.Remove(match); err != nil {
   239  					return vterrors.Wrapf(err, "can't remove existing file from %v (%v)", name, match)
   240  				}
   241  			}
   242  			continue
   243  		}
   244  
   245  		// Regular directory: delete recursively.
   246  		if _, err := os.Stat(path); os.IsNotExist(err) {
   247  			log.Infof("Restore: skipping removal of nonexistent %v (%v)", name, path)
   248  			continue
   249  		}
   250  		log.Infof("Restore: removing files in %v (%v)", name, path)
   251  		if err := os.RemoveAll(path); err != nil {
   252  			return vterrors.Wrapf(err, "can't remove existing files in %v (%v)", name, path)
   253  		}
   254  	}
   255  	return nil
   256  }
   257  
   258  // ShouldRestore checks whether a database with tables already exists
   259  // and returns whether a restore action should be performed
   260  func ShouldRestore(ctx context.Context, params RestoreParams) (bool, error) {
   261  	if params.DeleteBeforeRestore || RestoreWasInterrupted(params.Cnf) {
   262  		return true, nil
   263  	}
   264  	params.Logger.Infof("Restore: No %v file found, checking no existing data is present", RestoreState)
   265  	// Wait for mysqld to be ready, in case it was launched in parallel with us.
   266  	// If this doesn't succeed, we should not attempt a restore
   267  	if err := params.Mysqld.Wait(ctx, params.Cnf); err != nil {
   268  		return false, err
   269  	}
   270  	return checkNoDB(ctx, params.Mysqld, params.DbName)
   271  }
   272  
   273  // Restore is the main entry point for backup restore.  If there is no
   274  // appropriate backup on the BackupStorage, Restore logs an error
   275  // and returns ErrNoBackup. Any other error is returned.
   276  func Restore(ctx context.Context, params RestoreParams) (*BackupManifest, error) {
   277  	startTs := time.Now()
   278  	// find the right backup handle: most recent one, with a MANIFEST
   279  	params.Logger.Infof("Restore: looking for a suitable backup to restore")
   280  	bs, err := backupstorage.GetBackupStorage()
   281  	if err != nil {
   282  		return nil, err
   283  	}
   284  	defer bs.Close()
   285  
   286  	// Backups are stored in a directory structure that starts with
   287  	// <keyspace>/<shard>
   288  	backupDir := GetBackupDir(params.Keyspace, params.Shard)
   289  	bhs, err := bs.ListBackups(ctx, backupDir)
   290  	if err != nil {
   291  		return nil, vterrors.Wrap(err, "ListBackups failed")
   292  	}
   293  
   294  	if len(bhs) == 0 {
   295  		// There are no backups (not even broken/incomplete ones).
   296  		params.Logger.Errorf("no backup to restore on BackupStorage for directory %v. Starting up empty.", backupDir)
   297  		// Wait for mysqld to be ready, in case it was launched in parallel with us.
   298  		if err = params.Mysqld.Wait(ctx, params.Cnf); err != nil {
   299  			params.Logger.Errorf("mysqld is not running: %v", err)
   300  			return nil, err
   301  		}
   302  		// Since this is an empty database make sure we start replication at the beginning
   303  		if err := params.Mysqld.ResetReplication(ctx); err != nil {
   304  			params.Logger.Errorf("error resetting replication: %v. Continuing", err)
   305  		}
   306  
   307  		// Always return ErrNoBackup
   308  		return nil, ErrNoBackup
   309  	}
   310  
   311  	restorePath, err := FindBackupToRestore(ctx, params, bhs)
   312  	if err != nil {
   313  		return nil, err
   314  	}
   315  	if restorePath.IsEmpty() {
   316  		// This condition should not happen; but we validate for sanity
   317  		return nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "empty restore path")
   318  	}
   319  	bh := restorePath.FullBackupHandle()
   320  	re, err := GetRestoreEngine(ctx, bh)
   321  	if err != nil {
   322  		return nil, vterrors.Wrap(err, "Failed to find restore engine")
   323  	}
   324  	params.Logger.Infof("Restore: %v", restorePath.String())
   325  	if params.DryRun {
   326  		return nil, nil
   327  	}
   328  	manifest, err := re.ExecuteRestore(ctx, params, bh)
   329  	if err != nil {
   330  		return nil, err
   331  	}
   332  
   333  	// mysqld needs to be running in order for mysql_upgrade to work.
   334  	// If we've just restored from a backup from previous MySQL version then mysqld
   335  	// may fail to start due to a different structure of mysql.* tables. The flag
   336  	// --skip-grant-tables ensures that these tables are not read until mysql_upgrade
   337  	// is executed. And since with --skip-grant-tables anyone can connect to MySQL
   338  	// without password, we are passing --skip-networking to greatly reduce the set
   339  	// of those who can connect.
   340  	params.Logger.Infof("Restore: starting mysqld for mysql_upgrade")
   341  	// Note Start will use dba user for waiting, this is fine, it will be allowed.
   342  	err = params.Mysqld.Start(context.Background(), params.Cnf, "--skip-grant-tables", "--skip-networking")
   343  	if err != nil {
   344  		return nil, err
   345  	}
   346  
   347  	// We disable super_read_only, in case it is in the default MySQL startup
   348  	// parameters and will be blocking the writes we need to do in
   349  	// PopulateMetadataTables().  We do it blindly, since
   350  	// this will fail on MariaDB, which doesn't have super_read_only
   351  	// This is safe, since we're restarting MySQL after the restore anyway
   352  	params.Logger.Infof("Restore: disabling super_read_only")
   353  	if err := params.Mysqld.SetSuperReadOnly(false); err != nil {
   354  		if strings.Contains(err.Error(), strconv.Itoa(mysql.ERUnknownSystemVariable)) {
   355  			params.Logger.Warningf("Restore: server does not know about super_read_only, continuing anyway...")
   356  		} else {
   357  			params.Logger.Errorf("Restore: unexpected error while trying to set super_read_only: %v", err)
   358  			return nil, err
   359  		}
   360  	}
   361  
   362  	params.Logger.Infof("Restore: running mysql_upgrade")
   363  	if err := params.Mysqld.RunMysqlUpgrade(); err != nil {
   364  		return nil, vterrors.Wrap(err, "mysql_upgrade failed")
   365  	}
   366  
   367  	// The MySQL manual recommends restarting mysqld after running mysql_upgrade,
   368  	// so that any changes made to system tables take effect.
   369  	params.Logger.Infof("Restore: restarting mysqld after mysql_upgrade")
   370  	err = params.Mysqld.Shutdown(context.Background(), params.Cnf, true)
   371  	if err != nil {
   372  		return nil, err
   373  	}
   374  	err = params.Mysqld.Start(context.Background(), params.Cnf)
   375  	if err != nil {
   376  		return nil, err
   377  	}
   378  
   379  	if handles := restorePath.IncrementalBackupHandles(); len(handles) > 0 {
   380  		params.Logger.Infof("Restore: applying %v incremental backups", len(handles))
   381  		for _, bh := range handles {
   382  			manifest, err := re.ExecuteRestore(ctx, params, bh)
   383  			if err != nil {
   384  				return nil, err
   385  			}
   386  			params.Logger.Infof("Restore: applied incremental backup: %v", manifest.Position)
   387  		}
   388  		params.Logger.Infof("Restore: done applying incremental backups")
   389  	}
   390  
   391  	params.Logger.Infof("Restore: removing state file")
   392  	if err = removeStateFile(params.Cnf); err != nil {
   393  		return nil, err
   394  	}
   395  
   396  	restoreDuration.Set(int64(time.Since(startTs).Seconds()))
   397  	params.Logger.Infof("Restore: complete")
   398  	return manifest, nil
   399  }