vitess.io/vitess@v0.16.2/go/vt/mysqlctl/azblobbackupstorage/azblob.go (about)

     1  /*
     2  Copyright 2020 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package azblobbackupstorage implements the BackupStorage interface
    18  // for Azure Blob Storage
    19  package azblobbackupstorage
    20  
    21  import (
    22  	"context"
    23  	"fmt"
    24  	"io"
    25  	"net/url"
    26  	"os"
    27  	"strings"
    28  	"sync"
    29  	"time"
    30  
    31  	"github.com/Azure/azure-pipeline-go/pipeline"
    32  	"github.com/Azure/azure-storage-blob-go/azblob"
    33  	"github.com/spf13/pflag"
    34  
    35  	"vitess.io/vitess/go/vt/concurrency"
    36  	"vitess.io/vitess/go/vt/log"
    37  	"vitess.io/vitess/go/vt/mysqlctl/backupstorage"
    38  	"vitess.io/vitess/go/vt/servenv"
    39  )
    40  
    41  var (
    42  	// This is the account name
    43  	accountName string
    44  
    45  	// This is the private access key
    46  	accountKeyFile string
    47  
    48  	// This is the name of the container that will store the backups
    49  	containerName string
    50  
    51  	// This is an optional prefix to prepend to all files
    52  	storageRoot string
    53  
    54  	azBlobParallelism int
    55  )
    56  
    57  func registerFlags(fs *pflag.FlagSet) {
    58  	fs.StringVar(&accountName, "azblob_backup_account_name", "", "Azure Storage Account name for backups; if this flag is unset, the environment variable VT_AZBLOB_ACCOUNT_NAME will be used.")
    59  	fs.StringVar(&accountKeyFile, "azblob_backup_account_key_file", "", "Path to a file containing the Azure Storage account key; if this flag is unset, the environment variable VT_AZBLOB_ACCOUNT_KEY will be used as the key itself (NOT a file path).")
    60  	fs.StringVar(&containerName, "azblob_backup_container_name", "", "Azure Blob Container Name.")
    61  	fs.StringVar(&storageRoot, "azblob_backup_storage_root", "", "Root prefix for all backup-related Azure Blobs; this should exclude both initial and trailing '/' (e.g. just 'a/b' not '/a/b/').")
    62  	fs.IntVar(&azBlobParallelism, "azblob_backup_parallelism", 1, "Azure Blob operation parallelism (requires extra memory when increased).")
    63  }
    64  
    65  func init() {
    66  	servenv.OnParseFor("vtbackup", registerFlags)
    67  	servenv.OnParseFor("vtctl", registerFlags)
    68  	servenv.OnParseFor("vtctld", registerFlags)
    69  	servenv.OnParseFor("vttablet", registerFlags)
    70  }
    71  
    72  const (
    73  	defaultRetryCount = 5
    74  	delimiter         = "/"
    75  )
    76  
    77  // Return a Shared credential from the available credential sources.
    78  // We will use credentials in the following order
    79  // 1. Direct Command Line Flag (azblob_backup_account_name, azblob_backup_account_key)
    80  // 2. Environment variables
    81  func azInternalCredentials() (string, string, error) {
    82  	actName := accountName
    83  	if actName == "" {
    84  		// Check the Environmental Value
    85  		actName = os.Getenv("VT_AZBLOB_ACCOUNT_NAME")
    86  	}
    87  
    88  	var actKey string
    89  	if accountKeyFile != "" {
    90  		log.Infof("Getting Azure Storage Account key from file: %s", accountKeyFile)
    91  		dat, err := os.ReadFile(accountKeyFile)
    92  		if err != nil {
    93  			return "", "", err
    94  		}
    95  		actKey = string(dat)
    96  	} else {
    97  		actKey = os.Getenv("VT_AZBLOB_ACCOUNT_KEY")
    98  	}
    99  
   100  	if actName == "" || actKey == "" {
   101  		return "", "", fmt.Errorf("Azure Storage Account credentials not found in command-line flags or environment variables")
   102  	}
   103  	return actName, actKey, nil
   104  }
   105  
   106  func azCredentials() (*azblob.SharedKeyCredential, error) {
   107  	actName, actKey, err := azInternalCredentials()
   108  	if err != nil {
   109  		return nil, err
   110  	}
   111  	return azblob.NewSharedKeyCredential(actName, actKey)
   112  }
   113  
   114  func azServiceURL(credentials *azblob.SharedKeyCredential) azblob.ServiceURL {
   115  	pipeline := azblob.NewPipeline(credentials, azblob.PipelineOptions{
   116  		Retry: azblob.RetryOptions{
   117  			Policy:   azblob.RetryPolicyFixed,
   118  			MaxTries: defaultRetryCount,
   119  			// Per https://godoc.org/github.com/Azure/azure-storage-blob-go/azblob#RetryOptions
   120  			// this should be set to a very nigh number (they claim 60s per MB).
   121  			// That could end up being days so we are limiting this to four hours.
   122  			TryTimeout: 4 * time.Hour,
   123  		},
   124  		Log: pipeline.LogOptions{
   125  			Log: func(level pipeline.LogLevel, message string) {
   126  				switch level {
   127  				case pipeline.LogFatal, pipeline.LogPanic:
   128  					log.Fatal(message)
   129  				case pipeline.LogError:
   130  					log.Error(message)
   131  				case pipeline.LogWarning:
   132  					log.Warning(message)
   133  				case pipeline.LogInfo, pipeline.LogDebug:
   134  					log.Info(message)
   135  				}
   136  			},
   137  			ShouldLog: func(level pipeline.LogLevel) bool {
   138  				switch level {
   139  				case pipeline.LogFatal, pipeline.LogPanic:
   140  					return bool(log.V(3))
   141  				case pipeline.LogError:
   142  					return bool(log.V(3))
   143  				case pipeline.LogWarning:
   144  					return bool(log.V(2))
   145  				case pipeline.LogInfo, pipeline.LogDebug:
   146  					return bool(log.V(1))
   147  				}
   148  				return false
   149  			},
   150  		},
   151  	})
   152  	u := url.URL{
   153  		Scheme: "https",
   154  		Host:   credentials.AccountName() + ".blob.core.windows.net",
   155  		Path:   "/",
   156  	}
   157  	return azblob.NewServiceURL(u, pipeline)
   158  }
   159  
   160  // AZBlobBackupHandle implements BackupHandle for Azure Blob service.
   161  type AZBlobBackupHandle struct {
   162  	bs        *AZBlobBackupStorage
   163  	dir       string
   164  	name      string
   165  	readOnly  bool
   166  	waitGroup sync.WaitGroup
   167  	errors    concurrency.AllErrorRecorder
   168  	ctx       context.Context
   169  	cancel    context.CancelFunc
   170  }
   171  
   172  // Directory implements BackupHandle.
   173  func (bh *AZBlobBackupHandle) Directory() string {
   174  	return bh.dir
   175  }
   176  
   177  // Name implements BackupHandle.
   178  func (bh *AZBlobBackupHandle) Name() string {
   179  	return bh.name
   180  }
   181  
   182  // RecordError is part of the concurrency.ErrorRecorder interface.
   183  func (bh *AZBlobBackupHandle) RecordError(err error) {
   184  	bh.errors.RecordError(err)
   185  }
   186  
   187  // HasErrors is part of the concurrency.ErrorRecorder interface.
   188  func (bh *AZBlobBackupHandle) HasErrors() bool {
   189  	return bh.errors.HasErrors()
   190  }
   191  
   192  // Error is part of the concurrency.ErrorRecorder interface.
   193  func (bh *AZBlobBackupHandle) Error() error {
   194  	return bh.errors.Error()
   195  }
   196  
   197  // AddFile implements BackupHandle.
   198  func (bh *AZBlobBackupHandle) AddFile(ctx context.Context, filename string, filesize int64) (io.WriteCloser, error) {
   199  	if bh.readOnly {
   200  		return nil, fmt.Errorf("AddFile cannot be called on read-only backup")
   201  	}
   202  	// Error out if the file size it too large ( ~4.75 TB)
   203  	if filesize > azblob.BlockBlobMaxStageBlockBytes*azblob.BlockBlobMaxBlocks {
   204  		return nil, fmt.Errorf("filesize (%v) is too large to upload to az blob (max size %v)", filesize, azblob.BlockBlobMaxStageBlockBytes*azblob.BlockBlobMaxBlocks)
   205  	}
   206  
   207  	obj := objName(bh.dir, bh.name, filename)
   208  	containerURL, err := bh.bs.containerURL()
   209  	if err != nil {
   210  		return nil, err
   211  	}
   212  
   213  	blockBlobURL := containerURL.NewBlockBlobURL(obj)
   214  
   215  	reader, writer := io.Pipe()
   216  	bh.waitGroup.Add(1)
   217  
   218  	go func() {
   219  		defer bh.waitGroup.Done()
   220  		_, err := azblob.UploadStreamToBlockBlob(bh.ctx, reader, blockBlobURL, azblob.UploadStreamToBlockBlobOptions{
   221  			BufferSize: azblob.BlockBlobMaxStageBlockBytes,
   222  			MaxBuffers: azBlobParallelism,
   223  		})
   224  		if err != nil {
   225  			reader.CloseWithError(err)
   226  			bh.RecordError(err)
   227  		}
   228  	}()
   229  
   230  	return writer, nil
   231  }
   232  
   233  // EndBackup implements BackupHandle.
   234  func (bh *AZBlobBackupHandle) EndBackup(ctx context.Context) error {
   235  	if bh.readOnly {
   236  		return fmt.Errorf("EndBackup cannot be called on read-only backup")
   237  	}
   238  	bh.waitGroup.Wait()
   239  	return bh.Error()
   240  }
   241  
   242  // AbortBackup implements BackupHandle.
   243  func (bh *AZBlobBackupHandle) AbortBackup(ctx context.Context) error {
   244  	if bh.readOnly {
   245  		return fmt.Errorf("AbortBackup cannot be called on read-only backup")
   246  	}
   247  	// Cancel the context of any uploads.
   248  	bh.cancel()
   249  
   250  	// Remove the backup
   251  	return bh.bs.RemoveBackup(ctx, bh.dir, bh.name)
   252  }
   253  
   254  // ReadFile implements BackupHandle.
   255  func (bh *AZBlobBackupHandle) ReadFile(ctx context.Context, filename string) (io.ReadCloser, error) {
   256  	if !bh.readOnly {
   257  		return nil, fmt.Errorf("ReadFile cannot be called on read-write backup")
   258  	}
   259  
   260  	obj := objName(bh.dir, filename)
   261  	containerURL, err := bh.bs.containerURL()
   262  	if err != nil {
   263  		return nil, err
   264  	}
   265  	blobURL := containerURL.NewBlobURL(obj)
   266  
   267  	resp, err := blobURL.Download(ctx, 0, azblob.CountToEnd, azblob.BlobAccessConditions{}, false, azblob.ClientProvidedKeyOptions{})
   268  	if err != nil {
   269  		return nil, err
   270  	}
   271  	return resp.Body(azblob.RetryReaderOptions{
   272  		MaxRetryRequests: defaultRetryCount,
   273  		NotifyFailedRead: func(failureCount int, lastError error, offset int64, count int64, willRetry bool) {
   274  			log.Warningf("ReadFile: [azblob] container: %s, directory: %s, filename: %s, error: %v", containerName, objName(bh.dir, ""), filename, lastError)
   275  		},
   276  		TreatEarlyCloseAsError: true,
   277  	}), nil
   278  }
   279  
   280  // AZBlobBackupStorage structs implements the BackupStorage interface for AZBlob
   281  type AZBlobBackupStorage struct {
   282  }
   283  
   284  func (bs *AZBlobBackupStorage) containerURL() (*azblob.ContainerURL, error) {
   285  	credentials, err := azCredentials()
   286  	if err != nil {
   287  		return nil, err
   288  	}
   289  	u := azServiceURL(credentials).NewContainerURL(containerName)
   290  	return &u, nil
   291  }
   292  
   293  // ListBackups implements BackupStorage.
   294  func (bs *AZBlobBackupStorage) ListBackups(ctx context.Context, dir string) ([]backupstorage.BackupHandle, error) {
   295  	var searchPrefix string
   296  	if dir == "/" {
   297  		searchPrefix = "/"
   298  	} else {
   299  		searchPrefix = objName(dir, "")
   300  	}
   301  
   302  	log.Infof("ListBackups: [azblob] container: %s, directory: %v", containerName, searchPrefix)
   303  
   304  	containerURL, err := bs.containerURL()
   305  	if err != nil {
   306  		return nil, err
   307  	}
   308  
   309  	result := make([]backupstorage.BackupHandle, 0)
   310  	var subdirs []string
   311  
   312  	for marker := (azblob.Marker{}); marker.NotDone(); {
   313  		// This returns Blobs in sorted order so we don't need to sort them a second time.
   314  		resp, err := containerURL.ListBlobsHierarchySegment(ctx, marker, delimiter, azblob.ListBlobsSegmentOptions{
   315  			Prefix:     searchPrefix,
   316  			MaxResults: 0,
   317  		})
   318  
   319  		if err != nil {
   320  			return nil, err
   321  		}
   322  
   323  		for _, item := range resp.Segment.BlobPrefixes {
   324  			subdir := strings.TrimPrefix(item.Name, searchPrefix)
   325  			subdir = strings.TrimSuffix(subdir, delimiter)
   326  			subdirs = append(subdirs, subdir)
   327  		}
   328  
   329  		marker = resp.NextMarker
   330  	}
   331  
   332  	for _, subdir := range subdirs {
   333  		cancelableCtx, cancel := context.WithCancel(ctx)
   334  		result = append(result, &AZBlobBackupHandle{
   335  			bs:       bs,
   336  			dir:      strings.Join([]string{dir, subdir}, "/"),
   337  			name:     subdir,
   338  			readOnly: true,
   339  			ctx:      cancelableCtx,
   340  			cancel:   cancel,
   341  		})
   342  	}
   343  
   344  	return result, nil
   345  }
   346  
   347  // StartBackup implements BackupStorage.
   348  func (bs *AZBlobBackupStorage) StartBackup(ctx context.Context, dir, name string) (backupstorage.BackupHandle, error) {
   349  	cancelableCtx, cancel := context.WithCancel(ctx)
   350  	return &AZBlobBackupHandle{
   351  		bs:       bs,
   352  		dir:      dir,
   353  		name:     name,
   354  		readOnly: false,
   355  		ctx:      cancelableCtx,
   356  		cancel:   cancel,
   357  	}, nil
   358  }
   359  
   360  // RemoveBackup implements BackupStorage.
   361  func (bs *AZBlobBackupStorage) RemoveBackup(ctx context.Context, dir, name string) error {
   362  	log.Infof("ListBackups: [azblob] container: %s, directory: %s", containerName, objName(dir, ""))
   363  
   364  	containerURL, err := bs.containerURL()
   365  	if err != nil {
   366  		return err
   367  	}
   368  
   369  	searchPrefix := objName(dir, name, "")
   370  
   371  	for marker := (azblob.Marker{}); marker.NotDone(); {
   372  		resp, err := containerURL.ListBlobsHierarchySegment(ctx, marker, delimiter, azblob.ListBlobsSegmentOptions{
   373  			Prefix:     searchPrefix,
   374  			MaxResults: 0,
   375  		})
   376  
   377  		if err != nil {
   378  			return err
   379  		}
   380  
   381  		// Right now there is no batch delete so we must iterate over all the blobs to delete them one by one
   382  		// One day we will be able to use this https://docs.microsoft.com/en-us/rest/api/storageservices/blob-batch
   383  		// but currently it is listed as a preview and its not in the go API
   384  		for _, item := range resp.Segment.BlobItems {
   385  			_, err := containerURL.NewBlobURL(item.Name).Delete(ctx, azblob.DeleteSnapshotsOptionInclude, azblob.BlobAccessConditions{})
   386  			if err != nil {
   387  				return err
   388  			}
   389  		}
   390  		marker = resp.NextMarker
   391  	}
   392  
   393  	// Delete the blob representing the folder of the backup, remove any trailing slash to signify we want to remove the folder
   394  	// NOTE: you must set DeleteSnapshotsOptionNone or this will error out with a server side error
   395  	for retry := 0; retry < defaultRetryCount; retry = retry + 1 {
   396  		// Since the deletion of blob's is asyncronious we may need to wait a bit before we delete the folder
   397  		// Also refresh the client just for good measure
   398  		time.Sleep(10 * time.Second)
   399  		containerURL, err = bs.containerURL()
   400  		if err != nil {
   401  			return err
   402  		}
   403  
   404  		log.Infof("Removing backup directory: %v", strings.TrimSuffix(searchPrefix, "/"))
   405  		_, err = containerURL.NewBlobURL(strings.TrimSuffix(searchPrefix, "/")).Delete(ctx, azblob.DeleteSnapshotsOptionNone, azblob.BlobAccessConditions{})
   406  		if err == nil {
   407  			break
   408  		}
   409  	}
   410  	return err
   411  }
   412  
   413  // Close implements BackupStorage.
   414  func (bs *AZBlobBackupStorage) Close() error {
   415  	// This function is a No-op
   416  	return nil
   417  }
   418  
   419  // objName joins path parts into an object name.
   420  // Unlike path.Join, it doesn't collapse ".." or strip trailing slashes.
   421  // It also adds the value of the -azblob_backup_storage_root flag if set.
   422  func objName(parts ...string) string {
   423  	if storageRoot != "" {
   424  		return storageRoot + "/" + strings.Join(parts, "/")
   425  	}
   426  	return strings.Join(parts, "/")
   427  }
   428  
   429  func init() {
   430  	backupstorage.BackupStorageMap["azblob"] = &AZBlobBackupStorage{}
   431  }