vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletmanager/rpc_backup.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package tabletmanager
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	"vitess.io/vitess/go/vt/logutil"
    25  	"vitess.io/vitess/go/vt/mysqlctl"
    26  	"vitess.io/vitess/go/vt/topo/topoproto"
    27  	"vitess.io/vitess/go/vt/vterrors"
    28  
    29  	tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata"
    30  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    31  )
    32  
    33  const (
    34  	backupModeOnline  = "online"
    35  	backupModeOffline = "offline"
    36  )
    37  
    38  // Backup takes a db backup and sends it to the BackupStorage
    39  func (tm *TabletManager) Backup(ctx context.Context, logger logutil.Logger, req *tabletmanagerdatapb.BackupRequest) error {
    40  	if tm.Cnf == nil {
    41  		return fmt.Errorf("cannot perform backup without my.cnf, please restart vttablet with a my.cnf file specified")
    42  	}
    43  
    44  	// Check tablet type current process has.
    45  	// During a network partition it is possible that from the topology perspective this is no longer the primary,
    46  	// but the process didn't find out about this.
    47  	// It is not safe to take backups from tablet in this state
    48  	currentTablet := tm.Tablet()
    49  	if !req.AllowPrimary && currentTablet.Type == topodatapb.TabletType_PRIMARY {
    50  		return fmt.Errorf("type PRIMARY cannot take backup. if you really need to do this, rerun the backup command with --allow_primary")
    51  	}
    52  	engine, err := mysqlctl.GetBackupEngine()
    53  	if err != nil {
    54  		return vterrors.Wrap(err, "failed to find backup engine")
    55  	}
    56  	// get Tablet info from topo so that it is up to date
    57  	tablet, err := tm.TopoServer.GetTablet(ctx, tm.tabletAlias)
    58  	if err != nil {
    59  		return err
    60  	}
    61  	if !req.AllowPrimary && tablet.Type == topodatapb.TabletType_PRIMARY {
    62  		return fmt.Errorf("type PRIMARY cannot take backup. if you really need to do this, rerun the backup command with --allow_primary")
    63  	}
    64  
    65  	// prevent concurrent backups, and record stats
    66  	backupMode := backupModeOnline
    67  	if engine.ShouldDrainForBackup() {
    68  		backupMode = backupModeOffline
    69  	}
    70  	if err := tm.beginBackup(backupMode); err != nil {
    71  		return err
    72  	}
    73  	defer tm.endBackup(backupMode)
    74  
    75  	var originalType topodatapb.TabletType
    76  	if engine.ShouldDrainForBackup() {
    77  		if err := tm.lock(ctx); err != nil {
    78  			return err
    79  		}
    80  		defer tm.unlock()
    81  
    82  		tablet, err := tm.TopoServer.GetTablet(ctx, tm.tabletAlias)
    83  		if err != nil {
    84  			return err
    85  		}
    86  		originalType = tablet.Type
    87  		// update our type to BACKUP
    88  		if err := tm.changeTypeLocked(ctx, topodatapb.TabletType_BACKUP, DBActionNone, SemiSyncActionUnset); err != nil {
    89  			return err
    90  		}
    91  	}
    92  	// create the loggers: tee to console and source
    93  	l := logutil.NewTeeLogger(logutil.NewConsoleLogger(), logger)
    94  
    95  	// now we can run the backup
    96  	backupParams := mysqlctl.BackupParams{
    97  		Cnf:                tm.Cnf,
    98  		Mysqld:             tm.MysqlDaemon,
    99  		Logger:             l,
   100  		Concurrency:        int(req.Concurrency),
   101  		IncrementalFromPos: req.IncrementalFromPos,
   102  		HookExtraEnv:       tm.hookExtraEnv(),
   103  		TopoServer:         tm.TopoServer,
   104  		Keyspace:           tablet.Keyspace,
   105  		Shard:              tablet.Shard,
   106  		TabletAlias:        topoproto.TabletAliasString(tablet.Alias),
   107  		BackupTime:         time.Now(),
   108  	}
   109  
   110  	returnErr := mysqlctl.Backup(ctx, backupParams)
   111  
   112  	if engine.ShouldDrainForBackup() {
   113  		bgCtx := context.Background()
   114  		// Starting from here we won't be able to recover if we get stopped by a cancelled
   115  		// context. It is also possible that the context already timed out during the
   116  		// above call to Backup. Thus we use the background context to get through to the finish.
   117  
   118  		// Change our type back to the original value.
   119  		// Original type could be primary so pass in a real value for PrimaryTermStartTime
   120  		if err := tm.changeTypeLocked(bgCtx, originalType, DBActionNone, SemiSyncActionNone); err != nil {
   121  			// failure in changing the topology type is probably worse,
   122  			// so returning that (we logged the snapshot error anyway)
   123  			if returnErr != nil {
   124  				l.Errorf("mysql backup command returned error: %v", returnErr)
   125  			}
   126  			returnErr = err
   127  		}
   128  	}
   129  
   130  	return returnErr
   131  }
   132  
   133  // RestoreFromBackup deletes all local data and then restores the data from the latest backup [at
   134  // or before the backupTime value if specified]
   135  func (tm *TabletManager) RestoreFromBackup(ctx context.Context, logger logutil.Logger, request *tabletmanagerdatapb.RestoreFromBackupRequest) error {
   136  	if err := tm.lock(ctx); err != nil {
   137  		return err
   138  	}
   139  	defer tm.unlock()
   140  
   141  	tablet, err := tm.TopoServer.GetTablet(ctx, tm.tabletAlias)
   142  	if err != nil {
   143  		return err
   144  	}
   145  	if tablet.Type == topodatapb.TabletType_PRIMARY {
   146  		return fmt.Errorf("type PRIMARY cannot restore from backup, if you really need to do this, restart vttablet in replica mode")
   147  	}
   148  
   149  	// create the loggers: tee to console and source
   150  	l := logutil.NewTeeLogger(logutil.NewConsoleLogger(), logger)
   151  
   152  	// now we can run restore
   153  	err = tm.restoreDataLocked(ctx, l, 0 /* waitForBackupInterval */, true /* deleteBeforeRestore */, request)
   154  
   155  	// re-run health check to be sure to capture any replication delay
   156  	tm.QueryServiceControl.BroadcastHealth()
   157  
   158  	return err
   159  }
   160  
   161  func (tm *TabletManager) beginBackup(backupMode string) error {
   162  	tm.mutex.Lock()
   163  	defer tm.mutex.Unlock()
   164  	if tm._isBackupRunning {
   165  		return fmt.Errorf("a backup is already running on tablet: %v", tm.tabletAlias)
   166  	}
   167  	// when mode is online we don't take the action lock, so we continue to serve,
   168  	// but let's set _isBackupRunning to true
   169  	// so that we only allow one online backup at a time
   170  	// offline backups also run only one at a time because we take the action lock
   171  	// so this is not really needed in that case, however we are using it to record the state
   172  	tm._isBackupRunning = true
   173  	statsBackupIsRunning.Set([]string{backupMode}, 1)
   174  	return nil
   175  }
   176  
   177  func (tm *TabletManager) endBackup(backupMode string) {
   178  	// now we set _isBackupRunning back to false
   179  	// have to take the mutex lock before writing to _ fields
   180  	tm.mutex.Lock()
   181  	defer tm.mutex.Unlock()
   182  	tm._isBackupRunning = false
   183  	statsBackupIsRunning.Set([]string{backupMode}, 0)
   184  }