vitess.io/vitess@v0.16.2/go/vt/mysqlctl/builtinbackupengine.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package mysqlctl 18 19 import ( 20 "bufio" 21 "context" 22 "encoding/hex" 23 "encoding/json" 24 "fmt" 25 "hash" 26 "hash/crc32" 27 "io" 28 "os" 29 "path" 30 "path/filepath" 31 "sync" 32 "sync/atomic" 33 "time" 34 35 "github.com/spf13/pflag" 36 37 "vitess.io/vitess/go/mysql" 38 "vitess.io/vitess/go/sync2" 39 "vitess.io/vitess/go/vt/concurrency" 40 "vitess.io/vitess/go/vt/log" 41 "vitess.io/vitess/go/vt/logutil" 42 "vitess.io/vitess/go/vt/mysqlctl/backupstorage" 43 "vitess.io/vitess/go/vt/proto/vtrpc" 44 "vitess.io/vitess/go/vt/servenv" 45 "vitess.io/vitess/go/vt/topo" 46 "vitess.io/vitess/go/vt/topo/topoproto" 47 "vitess.io/vitess/go/vt/vterrors" 48 "vitess.io/vitess/go/vt/vttablet/tmclient" 49 ) 50 51 const ( 52 builtinBackupEngineName = "builtin" 53 autoIncrementalFromPos = "auto" 54 writerBufferSize = 2 * 1024 * 1024 55 dataDictionaryFile = "mysql.ibd" 56 ) 57 58 var ( 59 // BuiltinBackupMysqldTimeout is how long ExecuteBackup should wait for response from mysqld.Shutdown. 60 // It can later be extended for other calls to mysqld during backup functions. 61 // Exported for testing. 62 BuiltinBackupMysqldTimeout = 10 * time.Minute 63 64 builtinBackupProgress = 5 * time.Second 65 ) 66 67 // BuiltinBackupEngine encapsulates the logic of the builtin engine 68 // it implements the BackupEngine interface and contains all the logic 69 // required to implement a backup/restore by copying files from and to 70 // the correct location / storage bucket 71 type BuiltinBackupEngine struct { 72 } 73 74 // builtinBackupManifest represents the backup. It lists all the files, the 75 // Position that the backup was taken at, the compression engine used, etc. 76 type builtinBackupManifest struct { 77 // BackupManifest is an anonymous embedding of the base manifest struct. 78 BackupManifest 79 80 // CompressionEngine stores which compression engine was originally provided 81 // to compress the files. Please note that if user has provided externalCompressorCmd 82 // then it will contain value 'external'. This field is used during restore routine to 83 // get a hint about what kind of compression was used. 84 CompressionEngine string `json:",omitempty"` 85 86 // FileEntries contains all the files in the backup 87 FileEntries []FileEntry 88 89 // SkipCompress is true if the backup files were NOT run through gzip. 90 // The field is expressed as a negative because it will come through as 91 // false for backups that were created before the field existed, and those 92 // backups all had compression enabled. 93 SkipCompress bool 94 } 95 96 // FileEntry is one file to backup 97 type FileEntry struct { 98 // Base is one of: 99 // - backupInnodbDataHomeDir for files that go into Mycnf.InnodbDataHomeDir 100 // - backupInnodbLogGroupHomeDir for files that go into Mycnf.InnodbLogGroupHomeDir 101 // - binLogDir for files that go in the binlog dir (base path of Mycnf.BinLogPath) 102 // - backupData for files that go into Mycnf.DataDir 103 Base string 104 105 // Name is the file name, relative to Base 106 Name string 107 108 // Hash is the hash of the final data (transformed and 109 // compressed if specified) stored in the BackupStorage. 110 Hash string 111 112 // ParentPath is an optional prefix to the Base path. If empty, it is ignored. Useful 113 // for writing files in a temporary directory 114 ParentPath string 115 } 116 117 func init() { 118 for _, cmd := range []string{"vtcombo", "vttablet", "vttestserver", "vtctld", "vtctldclient"} { 119 servenv.OnParseFor(cmd, registerBuiltinBackupEngineFlags) 120 } 121 } 122 123 func registerBuiltinBackupEngineFlags(fs *pflag.FlagSet) { 124 fs.DurationVar(&BuiltinBackupMysqldTimeout, "builtinbackup_mysqld_timeout", BuiltinBackupMysqldTimeout, "how long to wait for mysqld to shutdown at the start of the backup.") 125 fs.DurationVar(&builtinBackupProgress, "builtinbackup_progress", builtinBackupProgress, "how often to send progress updates when backing up large files.") 126 } 127 128 // isIncrementalBackup is a convenience function to check whether the params indicate an incremental backup request 129 func isIncrementalBackup(params BackupParams) bool { 130 return params.IncrementalFromPos != "" 131 } 132 133 // fullPath returns the full path of the entry, based on its type 134 func (fe *FileEntry) fullPath(cnf *Mycnf) (string, error) { 135 // find the root to use 136 var root string 137 switch fe.Base { 138 case backupInnodbDataHomeDir: 139 root = cnf.InnodbDataHomeDir 140 case backupInnodbLogGroupHomeDir: 141 root = cnf.InnodbLogGroupHomeDir 142 case backupData: 143 root = cnf.DataDir 144 case backupBinlogDir: 145 root = filepath.Dir(cnf.BinLogPath) 146 default: 147 return "", vterrors.Errorf(vtrpc.Code_UNKNOWN, "unknown base: %v", fe.Base) 148 } 149 150 return path.Join(fe.ParentPath, root, fe.Name), nil 151 } 152 153 // open attempts t oopen the file 154 func (fe *FileEntry) open(cnf *Mycnf, readOnly bool) (*os.File, error) { 155 name, err := fe.fullPath(cnf) 156 if err != nil { 157 return nil, vterrors.Wrapf(err, "cannot evaluate full name for %v", fe.Name) 158 } 159 var fd *os.File 160 if readOnly { 161 if fd, err = os.Open(name); err != nil { 162 return nil, vterrors.Wrapf(err, "cannot open source file %v", name) 163 } 164 } else { 165 dir := path.Dir(name) 166 if err := os.MkdirAll(dir, os.ModePerm); err != nil { 167 return nil, vterrors.Wrapf(err, "cannot create destination directory %v", dir) 168 } 169 if fd, err = os.Create(name); err != nil { 170 return nil, vterrors.Wrapf(err, "cannot create destination file %v", name) 171 } 172 } 173 return fd, nil 174 } 175 176 // ExecuteBackup runs a backup based on given params. This could be a full or incremental backup. 177 // The function returns a boolean that indicates if the backup is usable, and an overall error. 178 func (be *BuiltinBackupEngine) ExecuteBackup(ctx context.Context, params BackupParams, bh backupstorage.BackupHandle) (bool, error) { 179 params.Logger.Infof("Executing Backup at %v for keyspace/shard %v/%v on tablet %v, concurrency: %v, compress: %v, incrementalFromPos: %v", 180 params.BackupTime, params.Keyspace, params.Shard, params.TabletAlias, params.Concurrency, backupStorageCompress, params.IncrementalFromPos) 181 182 if isIncrementalBackup(params) { 183 return be.executeIncrementalBackup(ctx, params, bh) 184 } 185 return be.executeFullBackup(ctx, params, bh) 186 } 187 188 // executeIncrementalBackup runs an incremental backup, based on given 'incremental_from_pos', which can be: 189 // - A valid position 190 // - "auto", indicating the incremental backup should begin with last successful backup end position. 191 func (be *BuiltinBackupEngine) executeIncrementalBackup(ctx context.Context, params BackupParams, bh backupstorage.BackupHandle) (bool, error) { 192 if params.IncrementalFromPos == autoIncrementalFromPos { 193 params.Logger.Infof("auto evaluating incremental_from_pos") 194 bs, err := backupstorage.GetBackupStorage() 195 if err != nil { 196 return false, err 197 } 198 defer bs.Close() 199 200 // Backups are stored in a directory structure that starts with 201 // <keyspace>/<shard> 202 backupDir := GetBackupDir(params.Keyspace, params.Shard) 203 bhs, err := bs.ListBackups(ctx, backupDir) 204 if err != nil { 205 return false, vterrors.Wrap(err, "ListBackups failed") 206 } 207 _, manifest, err := FindLatestSuccessfulBackup(ctx, params.Logger, bhs) 208 if err != nil { 209 return false, vterrors.Wrap(err, "FindLatestSuccessfulBackup failed") 210 } 211 params.IncrementalFromPos = mysql.EncodePosition(manifest.Position) 212 params.Logger.Infof("auto evaluated incremental_from_pos: %s", params.IncrementalFromPos) 213 } 214 215 rp, err := mysql.DecodePosition(params.IncrementalFromPos) 216 if err != nil { 217 return false, vterrors.Wrapf(err, "cannot decode position in incremental backup: %v", params.IncrementalFromPos) 218 } 219 if !rp.MatchesFlavor(mysql.Mysql56FlavorID) { 220 // incrementalFromGtidSet, ok := rp.GTIDSet.(mysql.Mysql56GTIDSet) 221 // if !ok { 222 return false, vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "incremental backup only supports MySQL GTID positions. Got: %v", params.IncrementalFromPos) 223 } 224 serverUUID, err := params.Mysqld.GetServerUUID(ctx) 225 if err != nil { 226 return false, vterrors.Wrap(err, "can't get server uuid") 227 } 228 gtidPurged, err := params.Mysqld.GetGTIDPurged(ctx) 229 if err != nil { 230 return false, vterrors.Wrap(err, "can't get gtid_purged") 231 } 232 rpGTID, ok := rp.GTIDSet.(mysql.Mysql56GTIDSet) 233 if !ok { 234 return false, vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "cannot get MySQL GTID value: %v", rpGTID) 235 } 236 purgedGTID, ok := gtidPurged.GTIDSet.(mysql.Mysql56GTIDSet) 237 if !ok { 238 return false, vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "cannot get MySQL GTID purged value: %v", rpGTID) 239 } 240 // binlogs may not contain information about purged GTIDs. e.g. some binlog.000003 may have 241 // previous GTIDs like 00021324-1111-1111-1111-111111111111:30-60, ie 1-29 range is missing. This can happen 242 // when a server is restored from backup and set with gtid_purged != "". 243 // This is fine! 244 // Shortly we will compare a binlog's "Previous GTIDs" with the backup's position. For the purpose of comparison, we 245 // ignore the purged GTIDs: 246 binlogCompareGTID := rpGTID.Difference(purgedGTID) 247 248 if err := params.Mysqld.FlushBinaryLogs(ctx); err != nil { 249 return false, vterrors.Wrapf(err, "cannot flush binary logs in incremental backup") 250 } 251 binaryLogs, err := params.Mysqld.GetBinaryLogs(ctx) 252 if err != nil { 253 return false, vterrors.Wrapf(err, "cannot get binary logs in incremental backup") 254 } 255 previousGTIDs := map[string]string{} 256 getPreviousGTIDs := func(ctx context.Context, binlog string) (gtids string, err error) { 257 gtids, ok := previousGTIDs[binlog] 258 if ok { 259 // Found a cached entry! No need to query again 260 return gtids, nil 261 } 262 gtids, err = params.Mysqld.GetPreviousGTIDs(ctx, binlog) 263 if err != nil { 264 return gtids, err 265 } 266 previousGTIDs[binlog] = gtids 267 return gtids, nil 268 } 269 binaryLogsToBackup, incrementalBackupFromGTID, incrementalBackupToGTID, err := ChooseBinlogsForIncrementalBackup(ctx, binlogCompareGTID, binaryLogs, getPreviousGTIDs, true) 270 if err != nil { 271 return false, vterrors.Wrapf(err, "cannot get binary logs to backup in incremental backup") 272 } 273 incrementalBackupFromPosition, err := mysql.ParsePosition(mysql.Mysql56FlavorID, incrementalBackupFromGTID) 274 if err != nil { 275 return false, vterrors.Wrapf(err, "cannot parse position %v", incrementalBackupFromGTID) 276 } 277 incrementalBackupToPosition, err := mysql.ParsePosition(mysql.Mysql56FlavorID, incrementalBackupToGTID) 278 if err != nil { 279 return false, vterrors.Wrapf(err, "cannot parse position %v", incrementalBackupToGTID) 280 } 281 // It's worthwhile we explain the difference between params.IncrementalFromPos and incrementalBackupFromPosition. 282 // params.IncrementalFromPos is supplied by the user. They want an incremental backup that covers that position. 283 // However, we implement incremental backups by copying complete binlog files. That position could potentially 284 // be somewhere in the middle of some binlog. So we look at the earliest binlog file that covers the user's position. 285 // The backup we take either starts exactly at the user's position or at some prior position, depending where in the 286 // binlog file the user's requested position is found. 287 // incrementalBackupFromGTID is the "previous GTIDs" of the first binlog file we back up. 288 // It is a fact that incrementalBackupFromGTID is earlier or equal to params.IncrementalFromPos. 289 // In the backup manifest file, we document incrementalBackupFromGTID, not the user's requested position. 290 if err := be.backupFiles(ctx, params, bh, incrementalBackupToPosition, mysql.Position{}, incrementalBackupFromPosition, binaryLogsToBackup, serverUUID); err != nil { 291 return false, err 292 } 293 return true, nil 294 } 295 296 // executeFullBackup returns a boolean that indicates if the backup is usable, 297 // and an overall error. 298 func (be *BuiltinBackupEngine) executeFullBackup(ctx context.Context, params BackupParams, bh backupstorage.BackupHandle) (bool, error) { 299 300 if params.IncrementalFromPos != "" { 301 return be.executeIncrementalBackup(ctx, params, bh) 302 } 303 304 // Save initial state so we can restore. 305 replicaStartRequired := false 306 sourceIsPrimary := false 307 readOnly := true //nolint 308 var replicationPosition mysql.Position 309 semiSyncSource, semiSyncReplica := params.Mysqld.SemiSyncEnabled() 310 311 // See if we need to restart replication after backup. 312 params.Logger.Infof("getting current replication status") 313 replicaStatus, err := params.Mysqld.ReplicationStatus() 314 switch err { 315 case nil: 316 replicaStartRequired = replicaStatus.Healthy() && !DisableActiveReparents 317 case mysql.ErrNotReplica: 318 // keep going if we're the primary, might be a degenerate case 319 sourceIsPrimary = true 320 default: 321 return false, vterrors.Wrap(err, "can't get replica status") 322 } 323 324 // get the read-only flag 325 readOnly, err = params.Mysqld.IsReadOnly() 326 if err != nil { 327 return false, vterrors.Wrap(err, "can't get read-only status") 328 } 329 330 // get the replication position 331 if sourceIsPrimary { 332 if !readOnly { 333 params.Logger.Infof("turning primary read-only before backup") 334 if err = params.Mysqld.SetReadOnly(true); err != nil { 335 return false, vterrors.Wrap(err, "can't set read-only status") 336 } 337 } 338 replicationPosition, err = params.Mysqld.PrimaryPosition() 339 if err != nil { 340 return false, vterrors.Wrap(err, "can't get position on primary") 341 } 342 } else { 343 // This is a replica 344 if err := params.Mysqld.StopReplication(params.HookExtraEnv); err != nil { 345 return false, vterrors.Wrapf(err, "can't stop replica") 346 } 347 replicaStatus, err := params.Mysqld.ReplicationStatus() 348 if err != nil { 349 return false, vterrors.Wrap(err, "can't get replica status") 350 } 351 replicationPosition = replicaStatus.Position 352 } 353 params.Logger.Infof("using replication position: %v", replicationPosition) 354 355 gtidPurgedPosition, err := params.Mysqld.GetGTIDPurged(ctx) 356 if err != nil { 357 return false, vterrors.Wrap(err, "can't get gtid_purged") 358 } 359 360 if err != nil { 361 return false, vterrors.Wrap(err, "can't get purged position") 362 } 363 364 serverUUID, err := params.Mysqld.GetServerUUID(ctx) 365 if err != nil { 366 return false, vterrors.Wrap(err, "can't get server uuid") 367 } 368 369 // shutdown mysqld 370 shutdownCtx, cancel := context.WithTimeout(ctx, BuiltinBackupMysqldTimeout) 371 err = params.Mysqld.Shutdown(shutdownCtx, params.Cnf, true) 372 defer cancel() 373 if err != nil { 374 return false, vterrors.Wrap(err, "can't shutdown mysqld") 375 } 376 377 // Backup everything, capture the error. 378 backupErr := be.backupFiles(ctx, params, bh, replicationPosition, gtidPurgedPosition, mysql.Position{}, nil, serverUUID) 379 usable := backupErr == nil 380 381 // Try to restart mysqld, use background context in case we timed out the original context 382 err = params.Mysqld.Start(context.Background(), params.Cnf) 383 if err != nil { 384 return usable, vterrors.Wrap(err, "can't restart mysqld") 385 } 386 387 // And set read-only mode 388 params.Logger.Infof("resetting mysqld read-only to %v", readOnly) 389 if err := params.Mysqld.SetReadOnly(readOnly); err != nil { 390 return usable, err 391 } 392 393 // Restore original mysqld state that we saved above. 394 if semiSyncSource || semiSyncReplica { 395 // Only do this if one of them was on, since both being off could mean 396 // the plugin isn't even loaded, and the server variables don't exist. 397 params.Logger.Infof("restoring semi-sync settings from before backup: primary=%v, replica=%v", 398 semiSyncSource, semiSyncReplica) 399 err := params.Mysqld.SetSemiSyncEnabled(semiSyncSource, semiSyncReplica) 400 if err != nil { 401 return usable, err 402 } 403 } 404 if replicaStartRequired { 405 params.Logger.Infof("restarting mysql replication") 406 if err := params.Mysqld.StartReplication(params.HookExtraEnv); err != nil { 407 return usable, vterrors.Wrap(err, "cannot restart replica") 408 } 409 410 // this should be quick, but we might as well just wait 411 if err := WaitForReplicationStart(params.Mysqld, replicationStartDeadline); err != nil { 412 return usable, vterrors.Wrap(err, "replica is not restarting") 413 } 414 415 // Wait for a reliable value for ReplicationLagSeconds from ReplicationStatus() 416 417 // We know that we stopped at replicationPosition. 418 // If PrimaryPosition is the same, that means no writes 419 // have happened to primary, so we are up-to-date. 420 // Otherwise, we wait for replica's Position to change from 421 // the saved replicationPosition before proceeding 422 tmc := tmclient.NewTabletManagerClient() 423 defer tmc.Close() 424 remoteCtx, remoteCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 425 defer remoteCancel() 426 427 pos, err := getPrimaryPosition(remoteCtx, tmc, params.TopoServer, params.Keyspace, params.Shard) 428 // If we are unable to get the primary's position, return error. 429 if err != nil { 430 return usable, err 431 } 432 if !replicationPosition.Equal(pos) { 433 for { 434 if err := ctx.Err(); err != nil { 435 return usable, err 436 } 437 status, err := params.Mysqld.ReplicationStatus() 438 if err != nil { 439 return usable, err 440 } 441 newPos := status.Position 442 if !newPos.Equal(replicationPosition) { 443 break 444 } 445 time.Sleep(1 * time.Second) 446 } 447 } 448 } 449 450 return usable, backupErr 451 } 452 453 // backupFiles finds the list of files to backup, and creates the backup. 454 func (be *BuiltinBackupEngine) backupFiles( 455 ctx context.Context, 456 params BackupParams, 457 bh backupstorage.BackupHandle, 458 replicationPosition mysql.Position, 459 purgedPosition mysql.Position, 460 fromPosition mysql.Position, 461 binlogFiles []string, 462 serverUUID string, 463 ) (finalErr error) { 464 465 // Get the files to backup. 466 // We don't care about totalSize because we add each file separately. 467 var fes []FileEntry 468 var err error 469 if isIncrementalBackup(params) { 470 fes, _, err = binlogFilesToBackup(params.Cnf, binlogFiles) 471 } else { 472 fes, _, err = findFilesToBackup(params.Cnf) 473 } 474 if err != nil { 475 return vterrors.Wrap(err, "can't find files to backup") 476 } 477 params.Logger.Infof("found %v files to backup", len(fes)) 478 479 // Backup with the provided concurrency. 480 sema := sync2.NewSemaphore(params.Concurrency, 0) 481 wg := sync.WaitGroup{} 482 for i := range fes { 483 wg.Add(1) 484 go func(i int) { 485 defer wg.Done() 486 487 // Wait until we are ready to go, skip if we already 488 // encountered an error. 489 sema.Acquire() 490 defer sema.Release() 491 if bh.HasErrors() { 492 return 493 } 494 495 // Backup the individual file. 496 name := fmt.Sprintf("%v", i) 497 bh.RecordError(be.backupFile(ctx, params, bh, &fes[i], name)) 498 }(i) 499 } 500 501 wg.Wait() 502 503 // BackupHandle supports the ErrorRecorder interface for tracking errors 504 // across any goroutines that fan out to take the backup. This means that we 505 // don't need a local error recorder and can put everything through the bh. 506 // 507 // This handles the scenario where bh.AddFile() encounters an error asynchronously, 508 // which ordinarily would be lost in the context of `be.backupFile`, i.e. if an 509 // error were encountered 510 // [here](https://github.com/vitessio/vitess/blob/d26b6c7975b12a87364e471e2e2dfa4e253c2a5b/go/vt/mysqlctl/s3backupstorage/s3.go#L139-L142). 511 if bh.HasErrors() { 512 return bh.Error() 513 } 514 515 // open the MANIFEST 516 wc, err := bh.AddFile(ctx, backupManifestFileName, backupstorage.FileSizeUnknown) 517 if err != nil { 518 return vterrors.Wrapf(err, "cannot add %v to backup", backupManifestFileName) 519 } 520 defer func() { 521 if closeErr := wc.Close(); finalErr == nil { 522 finalErr = closeErr 523 } 524 }() 525 526 // JSON-encode and write the MANIFEST 527 bm := &builtinBackupManifest{ 528 // Common base fields 529 BackupManifest: BackupManifest{ 530 BackupMethod: builtinBackupEngineName, 531 Position: replicationPosition, 532 PurgedPosition: purgedPosition, 533 FromPosition: fromPosition, 534 Incremental: !fromPosition.IsZero(), 535 ServerUUID: serverUUID, 536 TabletAlias: params.TabletAlias, 537 Keyspace: params.Keyspace, 538 Shard: params.Shard, 539 BackupTime: params.BackupTime.UTC().Format(time.RFC3339), 540 FinishedTime: time.Now().UTC().Format(time.RFC3339), 541 }, 542 543 // Builtin-specific fields 544 FileEntries: fes, 545 SkipCompress: !backupStorageCompress, 546 CompressionEngine: CompressionEngineName, 547 } 548 data, err := json.MarshalIndent(bm, "", " ") 549 if err != nil { 550 return vterrors.Wrapf(err, "cannot JSON encode %v", backupManifestFileName) 551 } 552 if _, err := wc.Write([]byte(data)); err != nil { 553 return vterrors.Wrapf(err, "cannot write %v", backupManifestFileName) 554 } 555 556 return nil 557 } 558 559 type backupPipe struct { 560 filename string 561 maxSize int64 562 563 r io.Reader 564 w *bufio.Writer 565 566 crc32 hash.Hash32 567 nn int64 568 done chan struct{} 569 closed int32 570 } 571 572 func newBackupWriter(filename string, maxSize int64, w io.Writer) *backupPipe { 573 return &backupPipe{ 574 crc32: crc32.NewIEEE(), 575 w: bufio.NewWriterSize(w, writerBufferSize), 576 filename: filename, 577 maxSize: maxSize, 578 done: make(chan struct{}), 579 } 580 } 581 582 func newBackupReader(filename string, maxSize int64, r io.Reader) *backupPipe { 583 return &backupPipe{ 584 crc32: crc32.NewIEEE(), 585 r: r, 586 filename: filename, 587 done: make(chan struct{}), 588 maxSize: maxSize, 589 } 590 } 591 592 func (bp *backupPipe) Read(p []byte) (int, error) { 593 nn, err := bp.r.Read(p) 594 _, _ = bp.crc32.Write(p[:nn]) 595 atomic.AddInt64(&bp.nn, int64(nn)) 596 return nn, err 597 } 598 599 func (bp *backupPipe) Write(p []byte) (int, error) { 600 nn, err := bp.w.Write(p) 601 _, _ = bp.crc32.Write(p[:nn]) 602 atomic.AddInt64(&bp.nn, int64(nn)) 603 return nn, err 604 } 605 606 func (bp *backupPipe) Close() error { 607 if atomic.CompareAndSwapInt32(&bp.closed, 0, 1) { 608 close(bp.done) 609 if bp.w != nil { 610 if err := bp.w.Flush(); err != nil { 611 return err 612 } 613 } 614 } 615 return nil 616 } 617 618 func (bp *backupPipe) HashString() string { 619 return hex.EncodeToString(bp.crc32.Sum(nil)) 620 } 621 622 func (bp *backupPipe) ReportProgress(period time.Duration, logger logutil.Logger) { 623 tick := time.NewTicker(period) 624 defer tick.Stop() 625 for { 626 select { 627 case <-bp.done: 628 logger.Infof("Done taking Backup %q", bp.filename) 629 return 630 case <-tick.C: 631 written := float64(atomic.LoadInt64(&bp.nn)) 632 if bp.maxSize == 0 { 633 logger.Infof("Backup %q: %.02fkb", bp.filename, written/1024.0) 634 } else { 635 maxSize := float64(bp.maxSize) 636 logger.Infof("Backup %q: %.02f%% (%.02f/%.02fkb)", bp.filename, 100.0*written/maxSize, written/1024.0, maxSize/1024.0) 637 } 638 } 639 } 640 } 641 642 // backupFile backs up an individual file. 643 func (be *BuiltinBackupEngine) backupFile(ctx context.Context, params BackupParams, bh backupstorage.BackupHandle, fe *FileEntry, name string) (finalErr error) { 644 // Open the source file for reading. 645 source, err := fe.open(params.Cnf, true) 646 if err != nil { 647 return err 648 } 649 defer source.Close() 650 651 fi, err := source.Stat() 652 if err != nil { 653 return err 654 } 655 656 params.Logger.Infof("Backing up file: %v", fe.Name) 657 // Open the destination file for writing, and a buffer. 658 wc, err := bh.AddFile(ctx, name, fi.Size()) 659 if err != nil { 660 return vterrors.Wrapf(err, "cannot add file: %v,%v", name, fe.Name) 661 } 662 defer func(name, fileName string) { 663 if rerr := wc.Close(); rerr != nil { 664 if finalErr != nil { 665 // We already have an error, just log this one. 666 params.Logger.Errorf2(rerr, "failed to close file %v,%v", name, fe.Name) 667 } else { 668 finalErr = rerr 669 } 670 } 671 }(name, fe.Name) 672 673 bw := newBackupWriter(fe.Name, fi.Size(), wc) 674 br := newBackupReader(fe.Name, fi.Size(), source) 675 go br.ReportProgress(builtinBackupProgress, params.Logger) 676 677 var writer io.Writer = bw 678 679 // Create the gzip compression pipe, if necessary. 680 var compressor io.WriteCloser 681 if backupStorageCompress { 682 if ExternalCompressorCmd != "" { 683 compressor, err = newExternalCompressor(ctx, ExternalCompressorCmd, writer, params.Logger) 684 } else { 685 compressor, err = newBuiltinCompressor(CompressionEngineName, writer, params.Logger) 686 } 687 if err != nil { 688 return vterrors.Wrap(err, "can't create compressor") 689 } 690 writer = compressor 691 } 692 693 // Copy from the source file to writer (optional gzip, 694 // optional pipe, tee, output file and hasher). 695 _, err = io.Copy(writer, br) 696 if err != nil { 697 return vterrors.Wrap(err, "cannot copy data") 698 } 699 700 // Close gzip to flush it, after that all data is sent to writer. 701 if compressor != nil { 702 if err = compressor.Close(); err != nil { 703 return vterrors.Wrap(err, "cannot close compressor") 704 } 705 } 706 707 // Close the backupPipe to finish writing on destination. 708 if err = bw.Close(); err != nil { 709 return vterrors.Wrapf(err, "cannot flush destination: %v", name) 710 } 711 712 if err := br.Close(); err != nil { 713 return vterrors.Wrap(err, "failed to close the source reader") 714 } 715 716 // Save the hash. 717 fe.Hash = bw.HashString() 718 return nil 719 } 720 721 // executeRestoreFullBackup restores the files from a full backup. The underlying mysql database service is expected to be stopped. 722 func (be *BuiltinBackupEngine) executeRestoreFullBackup(ctx context.Context, params RestoreParams, bh backupstorage.BackupHandle, bm builtinBackupManifest) error { 723 if err := prepareToRestore(ctx, params.Cnf, params.Mysqld, params.Logger); err != nil { 724 return err 725 } 726 727 params.Logger.Infof("Restore: copying %v files", len(bm.FileEntries)) 728 729 if _, err := be.restoreFiles(context.Background(), params, bh, bm); err != nil { 730 // don't delete the file here because that is how we detect an interrupted restore 731 return vterrors.Wrap(err, "failed to restore files") 732 } 733 return nil 734 } 735 736 // executeRestoreIncrementalBackup executes a restore of an incremental backup, and expect to run on top of a full backup's restore. 737 // It restores any (zero or more) binary log files and applies them onto the underlying database one at a time, but only applies those transactions 738 // that fall within params.RestoreToPos.GTIDSet. The rest (typically a suffix of the last binary log) are discarded. 739 // The underlying mysql database is expected to be up and running. 740 func (be *BuiltinBackupEngine) executeRestoreIncrementalBackup(ctx context.Context, params RestoreParams, bh backupstorage.BackupHandle, bm builtinBackupManifest) error { 741 params.Logger.Infof("Restoring incremental backup to position: %v", bm.Position) 742 743 createdDir, err := be.restoreFiles(context.Background(), params, bh, bm) 744 defer os.RemoveAll(createdDir) 745 mysqld, ok := params.Mysqld.(*Mysqld) 746 if !ok { 747 return vterrors.Errorf(vtrpc.Code_UNIMPLEMENTED, "expected: Mysqld") 748 } 749 for _, fe := range bm.FileEntries { 750 fe.ParentPath = createdDir 751 binlogFile, err := fe.fullPath(params.Cnf) 752 if err != nil { 753 return vterrors.Wrap(err, "failed to restore file") 754 } 755 if err := mysqld.applyBinlogFile(binlogFile, params.RestoreToPos.GTIDSet); err != nil { 756 return vterrors.Wrap(err, "failed to extract binlog file") 757 } 758 defer os.Remove(binlogFile) 759 params.Logger.Infof("Applied binlog file: %v", binlogFile) 760 } 761 if err != nil { 762 // don't delete the file here because that is how we detect an interrupted restore 763 return vterrors.Wrap(err, "failed to restore files") 764 } 765 params.Logger.Infof("Restored incremental backup files to: %v", createdDir) 766 767 return nil 768 } 769 770 // ExecuteRestore restores from a backup. If the restore is successful 771 // we return the position from which replication should start 772 // otherwise an error is returned 773 func (be *BuiltinBackupEngine) ExecuteRestore(ctx context.Context, params RestoreParams, bh backupstorage.BackupHandle) (*BackupManifest, error) { 774 775 var bm builtinBackupManifest 776 777 if err := getBackupManifestInto(ctx, bh, &bm); err != nil { 778 return nil, err 779 } 780 781 // mark restore as in progress 782 if err := createStateFile(params.Cnf); err != nil { 783 return nil, err 784 } 785 786 var err error 787 if bm.Incremental { 788 err = be.executeRestoreIncrementalBackup(ctx, params, bh, bm) 789 } else { 790 err = be.executeRestoreFullBackup(ctx, params, bh, bm) 791 } 792 if err != nil { 793 return nil, err 794 } 795 params.Logger.Infof("Restore: returning replication position %v", bm.Position) 796 return &bm.BackupManifest, nil 797 } 798 799 // restoreFiles will copy all the files from the BackupStorage to the 800 // right place. 801 func (be *BuiltinBackupEngine) restoreFiles(ctx context.Context, params RestoreParams, bh backupstorage.BackupHandle, bm builtinBackupManifest) (createdDir string, err error) { 802 // For optimization, we are replacing pargzip with pgzip, so newBuiltinDecompressor doesn't have to compare and print warning for every file 803 // since newBuiltinDecompressor is helper method and does not hold any state, it was hard to do it in that method itself. 804 if bm.CompressionEngine == PargzipCompressor { 805 params.Logger.Warningf(`engine "pargzip" doesn't support decompression, using "pgzip" instead`) 806 bm.CompressionEngine = PgzipCompressor 807 defer func() { 808 bm.CompressionEngine = PargzipCompressor 809 }() 810 } 811 812 if bm.Incremental { 813 createdDir, err = os.MkdirTemp("", "restore-incremental-*") 814 if err != nil { 815 return "", err 816 } 817 } 818 fes := bm.FileEntries 819 sema := sync2.NewSemaphore(params.Concurrency, 0) 820 rec := concurrency.AllErrorRecorder{} 821 wg := sync.WaitGroup{} 822 for i := range fes { 823 wg.Add(1) 824 go func(i int) { 825 defer wg.Done() 826 827 // Wait until we are ready to go, skip if we already 828 // encountered an error. 829 sema.Acquire() 830 defer sema.Release() 831 if rec.HasErrors() { 832 return 833 } 834 835 fe := &fes[i] 836 fe.ParentPath = createdDir 837 // And restore the file. 838 name := fmt.Sprintf("%v", i) 839 params.Logger.Infof("Copying file %v: %v", name, fe.Name) 840 err := be.restoreFile(ctx, params, bh, fe, bm, name) 841 if err != nil { 842 rec.RecordError(vterrors.Wrapf(err, "can't restore file %v to %v", name, fe.Name)) 843 } 844 }(i) 845 } 846 wg.Wait() 847 return createdDir, rec.Error() 848 } 849 850 // restoreFile restores an individual file. 851 func (be *BuiltinBackupEngine) restoreFile(ctx context.Context, params RestoreParams, bh backupstorage.BackupHandle, fe *FileEntry, bm builtinBackupManifest, name string) (finalErr error) { 852 // Open the source file for reading. 853 source, err := bh.ReadFile(ctx, name) 854 if err != nil { 855 return vterrors.Wrap(err, "can't open source file for reading") 856 } 857 defer source.Close() 858 859 // Open the destination file for writing. 860 dstFile, err := fe.open(params.Cnf, false) 861 if err != nil { 862 return vterrors.Wrap(err, "can't open destination file for writing") 863 } 864 defer func() { 865 if cerr := dstFile.Close(); cerr != nil { 866 if finalErr != nil { 867 // We already have an error, just log this one. 868 log.Errorf("failed to close file %v: %v", name, cerr) 869 } else { 870 finalErr = vterrors.Wrap(cerr, "failed to close destination file") 871 } 872 } 873 }() 874 875 bp := newBackupReader(name, 0, source) 876 go bp.ReportProgress(builtinBackupProgress, params.Logger) 877 878 dst := bufio.NewWriterSize(dstFile, writerBufferSize) 879 var reader io.Reader = bp 880 881 // Create the uncompresser if needed. 882 if !bm.SkipCompress { 883 var decompressor io.ReadCloser 884 var deCompressionEngine = bm.CompressionEngine 885 if deCompressionEngine == "" { 886 // for backward compatibility 887 deCompressionEngine = PgzipCompressor 888 } 889 if ExternalDecompressorCmd != "" { 890 if deCompressionEngine == ExternalCompressor { 891 deCompressionEngine = ExternalDecompressorCmd 892 decompressor, err = newExternalDecompressor(ctx, deCompressionEngine, reader, params.Logger) 893 } else { 894 decompressor, err = newBuiltinDecompressor(deCompressionEngine, reader, params.Logger) 895 } 896 } else { 897 if deCompressionEngine == ExternalCompressor { 898 return fmt.Errorf("%w value: %q", errUnsupportedDeCompressionEngine, ExternalCompressor) 899 } 900 decompressor, err = newBuiltinDecompressor(deCompressionEngine, reader, params.Logger) 901 } 902 if err != nil { 903 return vterrors.Wrap(err, "can't create decompressor") 904 } 905 906 defer func() { 907 if cerr := decompressor.Close(); cerr != nil { 908 params.Logger.Errorf("failed to close decompressor: %v", cerr) 909 if finalErr != nil { 910 // We already have an error, just log this one. 911 log.Errorf("failed to close decompressor %v: %v", name, cerr) 912 } else { 913 finalErr = vterrors.Wrap(cerr, "failed to close decompressor") 914 } 915 } 916 }() 917 reader = decompressor 918 } 919 920 // Copy the data. Will also write to the hasher. 921 if _, err = io.Copy(dst, reader); err != nil { 922 return vterrors.Wrap(err, "failed to copy file contents") 923 } 924 925 // Check the hash. 926 hash := bp.HashString() 927 if hash != fe.Hash { 928 return vterrors.Errorf(vtrpc.Code_INTERNAL, "hash mismatch for %v, got %v expected %v", fe.Name, hash, fe.Hash) 929 } 930 931 // Flush the buffer. 932 if err := dst.Flush(); err != nil { 933 return vterrors.Wrap(err, "failed to flush destination buffer") 934 } 935 936 if err := bp.Close(); err != nil { 937 return vterrors.Wrap(err, "failed to close the source reader") 938 } 939 940 return nil 941 } 942 943 // ShouldDrainForBackup satisfies the BackupEngine interface 944 // backup requires query service to be stopped, hence true 945 func (be *BuiltinBackupEngine) ShouldDrainForBackup() bool { 946 return true 947 } 948 949 func getPrimaryPosition(ctx context.Context, tmc tmclient.TabletManagerClient, ts *topo.Server, keyspace, shard string) (mysql.Position, error) { 950 si, err := ts.GetShard(ctx, keyspace, shard) 951 if err != nil { 952 return mysql.Position{}, vterrors.Wrap(err, "can't read shard") 953 } 954 if topoproto.TabletAliasIsZero(si.PrimaryAlias) { 955 return mysql.Position{}, fmt.Errorf("shard %v/%v has no primary", keyspace, shard) 956 } 957 ti, err := ts.GetTablet(ctx, si.PrimaryAlias) 958 if err != nil { 959 return mysql.Position{}, fmt.Errorf("can't get primary tablet record %v: %v", topoproto.TabletAliasString(si.PrimaryAlias), err) 960 } 961 posStr, err := tmc.PrimaryPosition(ctx, ti.Tablet) 962 if err != nil { 963 return mysql.Position{}, fmt.Errorf("can't get primary replication position: %v", err) 964 } 965 pos, err := mysql.DecodePosition(posStr) 966 if err != nil { 967 return mysql.Position{}, fmt.Errorf("can't decode primary replication position %q: %v", posStr, err) 968 } 969 return pos, nil 970 } 971 972 func init() { 973 BackupRestoreEngineMap["builtin"] = &BuiltinBackupEngine{} 974 }