vitess.io/vitess@v0.16.2/go/vt/mysqlctl/backup.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package mysqlctl 18 19 import ( 20 "errors" 21 "fmt" 22 "os" 23 "path/filepath" 24 "strconv" 25 "strings" 26 "time" 27 28 "github.com/spf13/pflag" 29 30 "vitess.io/vitess/go/vt/servenv" 31 32 "context" 33 34 "vitess.io/vitess/go/mysql" 35 "vitess.io/vitess/go/stats" 36 "vitess.io/vitess/go/vt/log" 37 "vitess.io/vitess/go/vt/mysqlctl/backupstorage" 38 "vitess.io/vitess/go/vt/proto/vtrpc" 39 "vitess.io/vitess/go/vt/topo/topoproto" 40 "vitess.io/vitess/go/vt/vterrors" 41 42 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 43 ) 44 45 // This file handles the backup and restore related code 46 47 const ( 48 // the three bases for files to restore 49 backupInnodbDataHomeDir = "InnoDBData" 50 backupInnodbLogGroupHomeDir = "InnoDBLog" 51 backupBinlogDir = "BinLog" 52 backupData = "Data" 53 54 // backupManifestFileName is the MANIFEST file name within a backup. 55 backupManifestFileName = "MANIFEST" 56 // RestoreState is the name of the sentinel file used to detect whether a previous restore 57 // terminated abnormally 58 RestoreState = "restore_in_progress" 59 // BackupTimestampFormat is the format in which we save BackupTime and FinishedTime 60 BackupTimestampFormat = "2006-01-02.150405" 61 ) 62 63 const ( 64 // replicationStartDeadline is the deadline for starting replication 65 replicationStartDeadline = 30 66 ) 67 68 var ( 69 // ErrNoBackup is returned when there is no backup. 70 ErrNoBackup = errors.New("no available backup") 71 72 // ErrNoCompleteBackup is returned when there is at least one backup, 73 // but none of them are complete. 74 ErrNoCompleteBackup = errors.New("backup(s) found but none are complete") 75 76 // backupStorageCompress can be set to false to not use gzip 77 // on the backups. 78 backupStorageCompress = true 79 80 // backupCompressBlockSize is the splitting size for each 81 // compressed block 82 backupCompressBlockSize = 250000 83 84 // backupCompressBlocks is the number of blocks that are processed 85 // once before the writer blocks 86 backupCompressBlocks = 2 87 88 backupDuration = stats.NewGauge("backup_duration_seconds", "How long it took to complete the last backup operation (in seconds)") 89 restoreDuration = stats.NewGauge("restore_duration_seconds", "How long it took to complete the last restore operation (in seconds)") 90 ) 91 92 func init() { 93 for _, cmd := range []string{"vtcombo", "vttablet", "vttestserver", "vtbackup", "vtctld"} { 94 servenv.OnParseFor(cmd, registerBackupFlags) 95 } 96 } 97 98 func registerBackupFlags(fs *pflag.FlagSet) { 99 fs.BoolVar(&backupStorageCompress, "backup_storage_compress", backupStorageCompress, "if set, the backup files will be compressed.") 100 fs.IntVar(&backupCompressBlockSize, "backup_storage_block_size", backupCompressBlockSize, "if backup_storage_compress is true, backup_storage_block_size sets the byte size for each block while compressing (default is 250000).") 101 fs.IntVar(&backupCompressBlocks, "backup_storage_number_blocks", backupCompressBlocks, "if backup_storage_compress is true, backup_storage_number_blocks sets the number of blocks that can be processed, at once, before the writer blocks, during compression (default is 2). It should be equal to the number of CPUs available for compression.") 102 } 103 104 // Backup is the main entry point for a backup: 105 // - uses the BackupStorage service to store a new backup 106 // - shuts down Mysqld during the backup 107 // - remember if we were replicating, restore the exact same state 108 func Backup(ctx context.Context, params BackupParams) error { 109 startTs := time.Now() 110 backupDir := GetBackupDir(params.Keyspace, params.Shard) 111 name := fmt.Sprintf("%v.%v", params.BackupTime.UTC().Format(BackupTimestampFormat), params.TabletAlias) 112 // Start the backup with the BackupStorage. 113 bs, err := backupstorage.GetBackupStorage() 114 if err != nil { 115 return vterrors.Wrap(err, "unable to get backup storage") 116 } 117 defer bs.Close() 118 bh, err := bs.StartBackup(ctx, backupDir, name) 119 if err != nil { 120 return vterrors.Wrap(err, "StartBackup failed") 121 } 122 123 be, err := GetBackupEngine() 124 if err != nil { 125 return vterrors.Wrap(err, "failed to find backup engine") 126 } 127 128 // Take the backup, and either AbortBackup or EndBackup. 129 usable, err := be.ExecuteBackup(ctx, params, bh) 130 logger := params.Logger 131 var finishErr error 132 if usable { 133 finishErr = bh.EndBackup(ctx) 134 } else { 135 logger.Errorf2(err, "backup is not usable, aborting it") 136 finishErr = bh.AbortBackup(ctx) 137 } 138 if err != nil { 139 if finishErr != nil { 140 // We have a backup error, and we also failed 141 // to finish the backup: just log the backup 142 // finish error, return the backup error. 143 logger.Errorf2(finishErr, "failed to finish backup: %v") 144 } 145 return err 146 } 147 148 // The backup worked, so just return the finish error, if any. 149 backupDuration.Set(int64(time.Since(startTs).Seconds())) 150 return finishErr 151 } 152 153 // ParseBackupName parses the backup name for a given dir/name, according to 154 // the format generated by mysqlctl.Backup. An error is returned only if the 155 // backup name does not have the expected number of parts; errors parsing the 156 // timestamp and tablet alias are logged, and a nil value is returned for those 157 // fields in case of error. 158 func ParseBackupName(dir string, name string) (backupTime *time.Time, alias *topodatapb.TabletAlias, err error) { 159 parts := strings.Split(name, ".") 160 if len(parts) != 3 { 161 return nil, nil, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "cannot backup name %s, expected <date>.<time>.<tablet_alias>", name) 162 } 163 164 // parts[0]: date part of BackupTimestampFormat 165 // parts[1]: time part of BackupTimestampFormat 166 // parts[2]: tablet alias 167 timestamp := strings.Join(parts[:2], ".") 168 aliasStr := parts[2] 169 170 btime, err := time.Parse(BackupTimestampFormat, timestamp) 171 if err != nil { 172 log.Errorf("error parsing backup time for %s/%s: %s", dir, name, err) 173 } else { 174 backupTime = &btime 175 } 176 177 alias, err = topoproto.ParseTabletAlias(aliasStr) 178 if err != nil { 179 log.Errorf("error parsing tablet alias for %s/%s: %s", dir, name, err) 180 alias = nil 181 } 182 183 return backupTime, alias, nil 184 } 185 186 // checkNoDB makes sure there is no user data already there. 187 // Used by Restore, as we do not want to destroy an existing DB. 188 // The user's database name must be given since we ignore all others. 189 // Returns (true, nil) if the specified DB doesn't exist. 190 // Returns (false, nil) if the check succeeds but the condition is not 191 // satisfied (there is a DB). 192 // Returns (false, non-nil error) if one occurs while trying to perform the check. 193 func checkNoDB(ctx context.Context, mysqld MysqlDaemon, dbName string) (bool, error) { 194 qr, err := mysqld.FetchSuperQuery(ctx, "SHOW DATABASES") 195 if err != nil { 196 return false, vterrors.Wrap(err, "checkNoDB failed") 197 } 198 199 for _, row := range qr.Rows { 200 if row[0].ToString() == dbName { 201 // found active db 202 log.Warningf("checkNoDB failed, found active db %v", dbName) 203 return false, nil 204 } 205 } 206 return true, nil 207 } 208 209 // removeExistingFiles will delete existing files in the data dir to prevent 210 // conflicts with the restored archive. In particular, binlogs can be created 211 // even during initial bootstrap, and these can interfere with configuring 212 // replication if kept around after the restore. 213 func removeExistingFiles(cnf *Mycnf) error { 214 paths := map[string]string{ 215 "BinLogPath.*": cnf.BinLogPath, 216 "DataDir": cnf.DataDir, 217 "InnodbDataHomeDir": cnf.InnodbDataHomeDir, 218 "InnodbLogGroupHomeDir": cnf.InnodbLogGroupHomeDir, 219 "RelayLogPath.*": cnf.RelayLogPath, 220 "RelayLogIndexPath": cnf.RelayLogIndexPath, 221 "RelayLogInfoPath": cnf.RelayLogInfoPath, 222 } 223 for name, path := range paths { 224 if path == "" { 225 return vterrors.Errorf(vtrpc.Code_UNKNOWN, "can't remove existing files: %v is unknown", name) 226 } 227 228 if strings.HasSuffix(name, ".*") { 229 // These paths are actually filename prefixes, not directories. 230 // An extension of the form ".###" is appended by mysqld. 231 path += ".*" 232 log.Infof("Restore: removing files in %v (%v)", name, path) 233 matches, err := filepath.Glob(path) 234 if err != nil { 235 return vterrors.Wrapf(err, "can't expand path glob %q", path) 236 } 237 for _, match := range matches { 238 if err := os.Remove(match); err != nil { 239 return vterrors.Wrapf(err, "can't remove existing file from %v (%v)", name, match) 240 } 241 } 242 continue 243 } 244 245 // Regular directory: delete recursively. 246 if _, err := os.Stat(path); os.IsNotExist(err) { 247 log.Infof("Restore: skipping removal of nonexistent %v (%v)", name, path) 248 continue 249 } 250 log.Infof("Restore: removing files in %v (%v)", name, path) 251 if err := os.RemoveAll(path); err != nil { 252 return vterrors.Wrapf(err, "can't remove existing files in %v (%v)", name, path) 253 } 254 } 255 return nil 256 } 257 258 // ShouldRestore checks whether a database with tables already exists 259 // and returns whether a restore action should be performed 260 func ShouldRestore(ctx context.Context, params RestoreParams) (bool, error) { 261 if params.DeleteBeforeRestore || RestoreWasInterrupted(params.Cnf) { 262 return true, nil 263 } 264 params.Logger.Infof("Restore: No %v file found, checking no existing data is present", RestoreState) 265 // Wait for mysqld to be ready, in case it was launched in parallel with us. 266 // If this doesn't succeed, we should not attempt a restore 267 if err := params.Mysqld.Wait(ctx, params.Cnf); err != nil { 268 return false, err 269 } 270 return checkNoDB(ctx, params.Mysqld, params.DbName) 271 } 272 273 // Restore is the main entry point for backup restore. If there is no 274 // appropriate backup on the BackupStorage, Restore logs an error 275 // and returns ErrNoBackup. Any other error is returned. 276 func Restore(ctx context.Context, params RestoreParams) (*BackupManifest, error) { 277 startTs := time.Now() 278 // find the right backup handle: most recent one, with a MANIFEST 279 params.Logger.Infof("Restore: looking for a suitable backup to restore") 280 bs, err := backupstorage.GetBackupStorage() 281 if err != nil { 282 return nil, err 283 } 284 defer bs.Close() 285 286 // Backups are stored in a directory structure that starts with 287 // <keyspace>/<shard> 288 backupDir := GetBackupDir(params.Keyspace, params.Shard) 289 bhs, err := bs.ListBackups(ctx, backupDir) 290 if err != nil { 291 return nil, vterrors.Wrap(err, "ListBackups failed") 292 } 293 294 if len(bhs) == 0 { 295 // There are no backups (not even broken/incomplete ones). 296 params.Logger.Errorf("no backup to restore on BackupStorage for directory %v. Starting up empty.", backupDir) 297 // Wait for mysqld to be ready, in case it was launched in parallel with us. 298 if err = params.Mysqld.Wait(ctx, params.Cnf); err != nil { 299 params.Logger.Errorf("mysqld is not running: %v", err) 300 return nil, err 301 } 302 // Since this is an empty database make sure we start replication at the beginning 303 if err := params.Mysqld.ResetReplication(ctx); err != nil { 304 params.Logger.Errorf("error resetting replication: %v. Continuing", err) 305 } 306 307 // Always return ErrNoBackup 308 return nil, ErrNoBackup 309 } 310 311 restorePath, err := FindBackupToRestore(ctx, params, bhs) 312 if err != nil { 313 return nil, err 314 } 315 if restorePath.IsEmpty() { 316 // This condition should not happen; but we validate for sanity 317 return nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "empty restore path") 318 } 319 bh := restorePath.FullBackupHandle() 320 re, err := GetRestoreEngine(ctx, bh) 321 if err != nil { 322 return nil, vterrors.Wrap(err, "Failed to find restore engine") 323 } 324 params.Logger.Infof("Restore: %v", restorePath.String()) 325 if params.DryRun { 326 return nil, nil 327 } 328 manifest, err := re.ExecuteRestore(ctx, params, bh) 329 if err != nil { 330 return nil, err 331 } 332 333 // mysqld needs to be running in order for mysql_upgrade to work. 334 // If we've just restored from a backup from previous MySQL version then mysqld 335 // may fail to start due to a different structure of mysql.* tables. The flag 336 // --skip-grant-tables ensures that these tables are not read until mysql_upgrade 337 // is executed. And since with --skip-grant-tables anyone can connect to MySQL 338 // without password, we are passing --skip-networking to greatly reduce the set 339 // of those who can connect. 340 params.Logger.Infof("Restore: starting mysqld for mysql_upgrade") 341 // Note Start will use dba user for waiting, this is fine, it will be allowed. 342 err = params.Mysqld.Start(context.Background(), params.Cnf, "--skip-grant-tables", "--skip-networking") 343 if err != nil { 344 return nil, err 345 } 346 347 // We disable super_read_only, in case it is in the default MySQL startup 348 // parameters and will be blocking the writes we need to do in 349 // PopulateMetadataTables(). We do it blindly, since 350 // this will fail on MariaDB, which doesn't have super_read_only 351 // This is safe, since we're restarting MySQL after the restore anyway 352 params.Logger.Infof("Restore: disabling super_read_only") 353 if err := params.Mysqld.SetSuperReadOnly(false); err != nil { 354 if strings.Contains(err.Error(), strconv.Itoa(mysql.ERUnknownSystemVariable)) { 355 params.Logger.Warningf("Restore: server does not know about super_read_only, continuing anyway...") 356 } else { 357 params.Logger.Errorf("Restore: unexpected error while trying to set super_read_only: %v", err) 358 return nil, err 359 } 360 } 361 362 params.Logger.Infof("Restore: running mysql_upgrade") 363 if err := params.Mysqld.RunMysqlUpgrade(); err != nil { 364 return nil, vterrors.Wrap(err, "mysql_upgrade failed") 365 } 366 367 // The MySQL manual recommends restarting mysqld after running mysql_upgrade, 368 // so that any changes made to system tables take effect. 369 params.Logger.Infof("Restore: restarting mysqld after mysql_upgrade") 370 err = params.Mysqld.Shutdown(context.Background(), params.Cnf, true) 371 if err != nil { 372 return nil, err 373 } 374 err = params.Mysqld.Start(context.Background(), params.Cnf) 375 if err != nil { 376 return nil, err 377 } 378 379 if handles := restorePath.IncrementalBackupHandles(); len(handles) > 0 { 380 params.Logger.Infof("Restore: applying %v incremental backups", len(handles)) 381 for _, bh := range handles { 382 manifest, err := re.ExecuteRestore(ctx, params, bh) 383 if err != nil { 384 return nil, err 385 } 386 params.Logger.Infof("Restore: applied incremental backup: %v", manifest.Position) 387 } 388 params.Logger.Infof("Restore: done applying incremental backups") 389 } 390 391 params.Logger.Infof("Restore: removing state file") 392 if err = removeStateFile(params.Cnf); err != nil { 393 return nil, err 394 } 395 396 restoreDuration.Set(int64(time.Since(startTs).Seconds())) 397 params.Logger.Infof("Restore: complete") 398 return manifest, nil 399 }