vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletmanager/rpc_backup.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package tabletmanager 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 "vitess.io/vitess/go/vt/logutil" 25 "vitess.io/vitess/go/vt/mysqlctl" 26 "vitess.io/vitess/go/vt/topo/topoproto" 27 "vitess.io/vitess/go/vt/vterrors" 28 29 tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata" 30 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 31 ) 32 33 const ( 34 backupModeOnline = "online" 35 backupModeOffline = "offline" 36 ) 37 38 // Backup takes a db backup and sends it to the BackupStorage 39 func (tm *TabletManager) Backup(ctx context.Context, logger logutil.Logger, req *tabletmanagerdatapb.BackupRequest) error { 40 if tm.Cnf == nil { 41 return fmt.Errorf("cannot perform backup without my.cnf, please restart vttablet with a my.cnf file specified") 42 } 43 44 // Check tablet type current process has. 45 // During a network partition it is possible that from the topology perspective this is no longer the primary, 46 // but the process didn't find out about this. 47 // It is not safe to take backups from tablet in this state 48 currentTablet := tm.Tablet() 49 if !req.AllowPrimary && currentTablet.Type == topodatapb.TabletType_PRIMARY { 50 return fmt.Errorf("type PRIMARY cannot take backup. if you really need to do this, rerun the backup command with --allow_primary") 51 } 52 engine, err := mysqlctl.GetBackupEngine() 53 if err != nil { 54 return vterrors.Wrap(err, "failed to find backup engine") 55 } 56 // get Tablet info from topo so that it is up to date 57 tablet, err := tm.TopoServer.GetTablet(ctx, tm.tabletAlias) 58 if err != nil { 59 return err 60 } 61 if !req.AllowPrimary && tablet.Type == topodatapb.TabletType_PRIMARY { 62 return fmt.Errorf("type PRIMARY cannot take backup. if you really need to do this, rerun the backup command with --allow_primary") 63 } 64 65 // prevent concurrent backups, and record stats 66 backupMode := backupModeOnline 67 if engine.ShouldDrainForBackup() { 68 backupMode = backupModeOffline 69 } 70 if err := tm.beginBackup(backupMode); err != nil { 71 return err 72 } 73 defer tm.endBackup(backupMode) 74 75 var originalType topodatapb.TabletType 76 if engine.ShouldDrainForBackup() { 77 if err := tm.lock(ctx); err != nil { 78 return err 79 } 80 defer tm.unlock() 81 82 tablet, err := tm.TopoServer.GetTablet(ctx, tm.tabletAlias) 83 if err != nil { 84 return err 85 } 86 originalType = tablet.Type 87 // update our type to BACKUP 88 if err := tm.changeTypeLocked(ctx, topodatapb.TabletType_BACKUP, DBActionNone, SemiSyncActionUnset); err != nil { 89 return err 90 } 91 } 92 // create the loggers: tee to console and source 93 l := logutil.NewTeeLogger(logutil.NewConsoleLogger(), logger) 94 95 // now we can run the backup 96 backupParams := mysqlctl.BackupParams{ 97 Cnf: tm.Cnf, 98 Mysqld: tm.MysqlDaemon, 99 Logger: l, 100 Concurrency: int(req.Concurrency), 101 IncrementalFromPos: req.IncrementalFromPos, 102 HookExtraEnv: tm.hookExtraEnv(), 103 TopoServer: tm.TopoServer, 104 Keyspace: tablet.Keyspace, 105 Shard: tablet.Shard, 106 TabletAlias: topoproto.TabletAliasString(tablet.Alias), 107 BackupTime: time.Now(), 108 } 109 110 returnErr := mysqlctl.Backup(ctx, backupParams) 111 112 if engine.ShouldDrainForBackup() { 113 bgCtx := context.Background() 114 // Starting from here we won't be able to recover if we get stopped by a cancelled 115 // context. It is also possible that the context already timed out during the 116 // above call to Backup. Thus we use the background context to get through to the finish. 117 118 // Change our type back to the original value. 119 // Original type could be primary so pass in a real value for PrimaryTermStartTime 120 if err := tm.changeTypeLocked(bgCtx, originalType, DBActionNone, SemiSyncActionNone); err != nil { 121 // failure in changing the topology type is probably worse, 122 // so returning that (we logged the snapshot error anyway) 123 if returnErr != nil { 124 l.Errorf("mysql backup command returned error: %v", returnErr) 125 } 126 returnErr = err 127 } 128 } 129 130 return returnErr 131 } 132 133 // RestoreFromBackup deletes all local data and then restores the data from the latest backup [at 134 // or before the backupTime value if specified] 135 func (tm *TabletManager) RestoreFromBackup(ctx context.Context, logger logutil.Logger, request *tabletmanagerdatapb.RestoreFromBackupRequest) error { 136 if err := tm.lock(ctx); err != nil { 137 return err 138 } 139 defer tm.unlock() 140 141 tablet, err := tm.TopoServer.GetTablet(ctx, tm.tabletAlias) 142 if err != nil { 143 return err 144 } 145 if tablet.Type == topodatapb.TabletType_PRIMARY { 146 return fmt.Errorf("type PRIMARY cannot restore from backup, if you really need to do this, restart vttablet in replica mode") 147 } 148 149 // create the loggers: tee to console and source 150 l := logutil.NewTeeLogger(logutil.NewConsoleLogger(), logger) 151 152 // now we can run restore 153 err = tm.restoreDataLocked(ctx, l, 0 /* waitForBackupInterval */, true /* deleteBeforeRestore */, request) 154 155 // re-run health check to be sure to capture any replication delay 156 tm.QueryServiceControl.BroadcastHealth() 157 158 return err 159 } 160 161 func (tm *TabletManager) beginBackup(backupMode string) error { 162 tm.mutex.Lock() 163 defer tm.mutex.Unlock() 164 if tm._isBackupRunning { 165 return fmt.Errorf("a backup is already running on tablet: %v", tm.tabletAlias) 166 } 167 // when mode is online we don't take the action lock, so we continue to serve, 168 // but let's set _isBackupRunning to true 169 // so that we only allow one online backup at a time 170 // offline backups also run only one at a time because we take the action lock 171 // so this is not really needed in that case, however we are using it to record the state 172 tm._isBackupRunning = true 173 statsBackupIsRunning.Set([]string{backupMode}, 1) 174 return nil 175 } 176 177 func (tm *TabletManager) endBackup(backupMode string) { 178 // now we set _isBackupRunning back to false 179 // have to take the mutex lock before writing to _ fields 180 tm.mutex.Lock() 181 defer tm.mutex.Unlock() 182 tm._isBackupRunning = false 183 statsBackupIsRunning.Set([]string{backupMode}, 0) 184 }