vitess.io/vitess@v0.16.2/go/vt/mysqlctl/azblobbackupstorage/azblob.go (about) 1 /* 2 Copyright 2020 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package azblobbackupstorage implements the BackupStorage interface 18 // for Azure Blob Storage 19 package azblobbackupstorage 20 21 import ( 22 "context" 23 "fmt" 24 "io" 25 "net/url" 26 "os" 27 "strings" 28 "sync" 29 "time" 30 31 "github.com/Azure/azure-pipeline-go/pipeline" 32 "github.com/Azure/azure-storage-blob-go/azblob" 33 "github.com/spf13/pflag" 34 35 "vitess.io/vitess/go/vt/concurrency" 36 "vitess.io/vitess/go/vt/log" 37 "vitess.io/vitess/go/vt/mysqlctl/backupstorage" 38 "vitess.io/vitess/go/vt/servenv" 39 ) 40 41 var ( 42 // This is the account name 43 accountName string 44 45 // This is the private access key 46 accountKeyFile string 47 48 // This is the name of the container that will store the backups 49 containerName string 50 51 // This is an optional prefix to prepend to all files 52 storageRoot string 53 54 azBlobParallelism int 55 ) 56 57 func registerFlags(fs *pflag.FlagSet) { 58 fs.StringVar(&accountName, "azblob_backup_account_name", "", "Azure Storage Account name for backups; if this flag is unset, the environment variable VT_AZBLOB_ACCOUNT_NAME will be used.") 59 fs.StringVar(&accountKeyFile, "azblob_backup_account_key_file", "", "Path to a file containing the Azure Storage account key; if this flag is unset, the environment variable VT_AZBLOB_ACCOUNT_KEY will be used as the key itself (NOT a file path).") 60 fs.StringVar(&containerName, "azblob_backup_container_name", "", "Azure Blob Container Name.") 61 fs.StringVar(&storageRoot, "azblob_backup_storage_root", "", "Root prefix for all backup-related Azure Blobs; this should exclude both initial and trailing '/' (e.g. just 'a/b' not '/a/b/').") 62 fs.IntVar(&azBlobParallelism, "azblob_backup_parallelism", 1, "Azure Blob operation parallelism (requires extra memory when increased).") 63 } 64 65 func init() { 66 servenv.OnParseFor("vtbackup", registerFlags) 67 servenv.OnParseFor("vtctl", registerFlags) 68 servenv.OnParseFor("vtctld", registerFlags) 69 servenv.OnParseFor("vttablet", registerFlags) 70 } 71 72 const ( 73 defaultRetryCount = 5 74 delimiter = "/" 75 ) 76 77 // Return a Shared credential from the available credential sources. 78 // We will use credentials in the following order 79 // 1. Direct Command Line Flag (azblob_backup_account_name, azblob_backup_account_key) 80 // 2. Environment variables 81 func azInternalCredentials() (string, string, error) { 82 actName := accountName 83 if actName == "" { 84 // Check the Environmental Value 85 actName = os.Getenv("VT_AZBLOB_ACCOUNT_NAME") 86 } 87 88 var actKey string 89 if accountKeyFile != "" { 90 log.Infof("Getting Azure Storage Account key from file: %s", accountKeyFile) 91 dat, err := os.ReadFile(accountKeyFile) 92 if err != nil { 93 return "", "", err 94 } 95 actKey = string(dat) 96 } else { 97 actKey = os.Getenv("VT_AZBLOB_ACCOUNT_KEY") 98 } 99 100 if actName == "" || actKey == "" { 101 return "", "", fmt.Errorf("Azure Storage Account credentials not found in command-line flags or environment variables") 102 } 103 return actName, actKey, nil 104 } 105 106 func azCredentials() (*azblob.SharedKeyCredential, error) { 107 actName, actKey, err := azInternalCredentials() 108 if err != nil { 109 return nil, err 110 } 111 return azblob.NewSharedKeyCredential(actName, actKey) 112 } 113 114 func azServiceURL(credentials *azblob.SharedKeyCredential) azblob.ServiceURL { 115 pipeline := azblob.NewPipeline(credentials, azblob.PipelineOptions{ 116 Retry: azblob.RetryOptions{ 117 Policy: azblob.RetryPolicyFixed, 118 MaxTries: defaultRetryCount, 119 // Per https://godoc.org/github.com/Azure/azure-storage-blob-go/azblob#RetryOptions 120 // this should be set to a very nigh number (they claim 60s per MB). 121 // That could end up being days so we are limiting this to four hours. 122 TryTimeout: 4 * time.Hour, 123 }, 124 Log: pipeline.LogOptions{ 125 Log: func(level pipeline.LogLevel, message string) { 126 switch level { 127 case pipeline.LogFatal, pipeline.LogPanic: 128 log.Fatal(message) 129 case pipeline.LogError: 130 log.Error(message) 131 case pipeline.LogWarning: 132 log.Warning(message) 133 case pipeline.LogInfo, pipeline.LogDebug: 134 log.Info(message) 135 } 136 }, 137 ShouldLog: func(level pipeline.LogLevel) bool { 138 switch level { 139 case pipeline.LogFatal, pipeline.LogPanic: 140 return bool(log.V(3)) 141 case pipeline.LogError: 142 return bool(log.V(3)) 143 case pipeline.LogWarning: 144 return bool(log.V(2)) 145 case pipeline.LogInfo, pipeline.LogDebug: 146 return bool(log.V(1)) 147 } 148 return false 149 }, 150 }, 151 }) 152 u := url.URL{ 153 Scheme: "https", 154 Host: credentials.AccountName() + ".blob.core.windows.net", 155 Path: "/", 156 } 157 return azblob.NewServiceURL(u, pipeline) 158 } 159 160 // AZBlobBackupHandle implements BackupHandle for Azure Blob service. 161 type AZBlobBackupHandle struct { 162 bs *AZBlobBackupStorage 163 dir string 164 name string 165 readOnly bool 166 waitGroup sync.WaitGroup 167 errors concurrency.AllErrorRecorder 168 ctx context.Context 169 cancel context.CancelFunc 170 } 171 172 // Directory implements BackupHandle. 173 func (bh *AZBlobBackupHandle) Directory() string { 174 return bh.dir 175 } 176 177 // Name implements BackupHandle. 178 func (bh *AZBlobBackupHandle) Name() string { 179 return bh.name 180 } 181 182 // RecordError is part of the concurrency.ErrorRecorder interface. 183 func (bh *AZBlobBackupHandle) RecordError(err error) { 184 bh.errors.RecordError(err) 185 } 186 187 // HasErrors is part of the concurrency.ErrorRecorder interface. 188 func (bh *AZBlobBackupHandle) HasErrors() bool { 189 return bh.errors.HasErrors() 190 } 191 192 // Error is part of the concurrency.ErrorRecorder interface. 193 func (bh *AZBlobBackupHandle) Error() error { 194 return bh.errors.Error() 195 } 196 197 // AddFile implements BackupHandle. 198 func (bh *AZBlobBackupHandle) AddFile(ctx context.Context, filename string, filesize int64) (io.WriteCloser, error) { 199 if bh.readOnly { 200 return nil, fmt.Errorf("AddFile cannot be called on read-only backup") 201 } 202 // Error out if the file size it too large ( ~4.75 TB) 203 if filesize > azblob.BlockBlobMaxStageBlockBytes*azblob.BlockBlobMaxBlocks { 204 return nil, fmt.Errorf("filesize (%v) is too large to upload to az blob (max size %v)", filesize, azblob.BlockBlobMaxStageBlockBytes*azblob.BlockBlobMaxBlocks) 205 } 206 207 obj := objName(bh.dir, bh.name, filename) 208 containerURL, err := bh.bs.containerURL() 209 if err != nil { 210 return nil, err 211 } 212 213 blockBlobURL := containerURL.NewBlockBlobURL(obj) 214 215 reader, writer := io.Pipe() 216 bh.waitGroup.Add(1) 217 218 go func() { 219 defer bh.waitGroup.Done() 220 _, err := azblob.UploadStreamToBlockBlob(bh.ctx, reader, blockBlobURL, azblob.UploadStreamToBlockBlobOptions{ 221 BufferSize: azblob.BlockBlobMaxStageBlockBytes, 222 MaxBuffers: azBlobParallelism, 223 }) 224 if err != nil { 225 reader.CloseWithError(err) 226 bh.RecordError(err) 227 } 228 }() 229 230 return writer, nil 231 } 232 233 // EndBackup implements BackupHandle. 234 func (bh *AZBlobBackupHandle) EndBackup(ctx context.Context) error { 235 if bh.readOnly { 236 return fmt.Errorf("EndBackup cannot be called on read-only backup") 237 } 238 bh.waitGroup.Wait() 239 return bh.Error() 240 } 241 242 // AbortBackup implements BackupHandle. 243 func (bh *AZBlobBackupHandle) AbortBackup(ctx context.Context) error { 244 if bh.readOnly { 245 return fmt.Errorf("AbortBackup cannot be called on read-only backup") 246 } 247 // Cancel the context of any uploads. 248 bh.cancel() 249 250 // Remove the backup 251 return bh.bs.RemoveBackup(ctx, bh.dir, bh.name) 252 } 253 254 // ReadFile implements BackupHandle. 255 func (bh *AZBlobBackupHandle) ReadFile(ctx context.Context, filename string) (io.ReadCloser, error) { 256 if !bh.readOnly { 257 return nil, fmt.Errorf("ReadFile cannot be called on read-write backup") 258 } 259 260 obj := objName(bh.dir, filename) 261 containerURL, err := bh.bs.containerURL() 262 if err != nil { 263 return nil, err 264 } 265 blobURL := containerURL.NewBlobURL(obj) 266 267 resp, err := blobURL.Download(ctx, 0, azblob.CountToEnd, azblob.BlobAccessConditions{}, false, azblob.ClientProvidedKeyOptions{}) 268 if err != nil { 269 return nil, err 270 } 271 return resp.Body(azblob.RetryReaderOptions{ 272 MaxRetryRequests: defaultRetryCount, 273 NotifyFailedRead: func(failureCount int, lastError error, offset int64, count int64, willRetry bool) { 274 log.Warningf("ReadFile: [azblob] container: %s, directory: %s, filename: %s, error: %v", containerName, objName(bh.dir, ""), filename, lastError) 275 }, 276 TreatEarlyCloseAsError: true, 277 }), nil 278 } 279 280 // AZBlobBackupStorage structs implements the BackupStorage interface for AZBlob 281 type AZBlobBackupStorage struct { 282 } 283 284 func (bs *AZBlobBackupStorage) containerURL() (*azblob.ContainerURL, error) { 285 credentials, err := azCredentials() 286 if err != nil { 287 return nil, err 288 } 289 u := azServiceURL(credentials).NewContainerURL(containerName) 290 return &u, nil 291 } 292 293 // ListBackups implements BackupStorage. 294 func (bs *AZBlobBackupStorage) ListBackups(ctx context.Context, dir string) ([]backupstorage.BackupHandle, error) { 295 var searchPrefix string 296 if dir == "/" { 297 searchPrefix = "/" 298 } else { 299 searchPrefix = objName(dir, "") 300 } 301 302 log.Infof("ListBackups: [azblob] container: %s, directory: %v", containerName, searchPrefix) 303 304 containerURL, err := bs.containerURL() 305 if err != nil { 306 return nil, err 307 } 308 309 result := make([]backupstorage.BackupHandle, 0) 310 var subdirs []string 311 312 for marker := (azblob.Marker{}); marker.NotDone(); { 313 // This returns Blobs in sorted order so we don't need to sort them a second time. 314 resp, err := containerURL.ListBlobsHierarchySegment(ctx, marker, delimiter, azblob.ListBlobsSegmentOptions{ 315 Prefix: searchPrefix, 316 MaxResults: 0, 317 }) 318 319 if err != nil { 320 return nil, err 321 } 322 323 for _, item := range resp.Segment.BlobPrefixes { 324 subdir := strings.TrimPrefix(item.Name, searchPrefix) 325 subdir = strings.TrimSuffix(subdir, delimiter) 326 subdirs = append(subdirs, subdir) 327 } 328 329 marker = resp.NextMarker 330 } 331 332 for _, subdir := range subdirs { 333 cancelableCtx, cancel := context.WithCancel(ctx) 334 result = append(result, &AZBlobBackupHandle{ 335 bs: bs, 336 dir: strings.Join([]string{dir, subdir}, "/"), 337 name: subdir, 338 readOnly: true, 339 ctx: cancelableCtx, 340 cancel: cancel, 341 }) 342 } 343 344 return result, nil 345 } 346 347 // StartBackup implements BackupStorage. 348 func (bs *AZBlobBackupStorage) StartBackup(ctx context.Context, dir, name string) (backupstorage.BackupHandle, error) { 349 cancelableCtx, cancel := context.WithCancel(ctx) 350 return &AZBlobBackupHandle{ 351 bs: bs, 352 dir: dir, 353 name: name, 354 readOnly: false, 355 ctx: cancelableCtx, 356 cancel: cancel, 357 }, nil 358 } 359 360 // RemoveBackup implements BackupStorage. 361 func (bs *AZBlobBackupStorage) RemoveBackup(ctx context.Context, dir, name string) error { 362 log.Infof("ListBackups: [azblob] container: %s, directory: %s", containerName, objName(dir, "")) 363 364 containerURL, err := bs.containerURL() 365 if err != nil { 366 return err 367 } 368 369 searchPrefix := objName(dir, name, "") 370 371 for marker := (azblob.Marker{}); marker.NotDone(); { 372 resp, err := containerURL.ListBlobsHierarchySegment(ctx, marker, delimiter, azblob.ListBlobsSegmentOptions{ 373 Prefix: searchPrefix, 374 MaxResults: 0, 375 }) 376 377 if err != nil { 378 return err 379 } 380 381 // Right now there is no batch delete so we must iterate over all the blobs to delete them one by one 382 // One day we will be able to use this https://docs.microsoft.com/en-us/rest/api/storageservices/blob-batch 383 // but currently it is listed as a preview and its not in the go API 384 for _, item := range resp.Segment.BlobItems { 385 _, err := containerURL.NewBlobURL(item.Name).Delete(ctx, azblob.DeleteSnapshotsOptionInclude, azblob.BlobAccessConditions{}) 386 if err != nil { 387 return err 388 } 389 } 390 marker = resp.NextMarker 391 } 392 393 // Delete the blob representing the folder of the backup, remove any trailing slash to signify we want to remove the folder 394 // NOTE: you must set DeleteSnapshotsOptionNone or this will error out with a server side error 395 for retry := 0; retry < defaultRetryCount; retry = retry + 1 { 396 // Since the deletion of blob's is asyncronious we may need to wait a bit before we delete the folder 397 // Also refresh the client just for good measure 398 time.Sleep(10 * time.Second) 399 containerURL, err = bs.containerURL() 400 if err != nil { 401 return err 402 } 403 404 log.Infof("Removing backup directory: %v", strings.TrimSuffix(searchPrefix, "/")) 405 _, err = containerURL.NewBlobURL(strings.TrimSuffix(searchPrefix, "/")).Delete(ctx, azblob.DeleteSnapshotsOptionNone, azblob.BlobAccessConditions{}) 406 if err == nil { 407 break 408 } 409 } 410 return err 411 } 412 413 // Close implements BackupStorage. 414 func (bs *AZBlobBackupStorage) Close() error { 415 // This function is a No-op 416 return nil 417 } 418 419 // objName joins path parts into an object name. 420 // Unlike path.Join, it doesn't collapse ".." or strip trailing slashes. 421 // It also adds the value of the -azblob_backup_storage_root flag if set. 422 func objName(parts ...string) string { 423 if storageRoot != "" { 424 return storageRoot + "/" + strings.Join(parts, "/") 425 } 426 return strings.Join(parts, "/") 427 } 428 429 func init() { 430 backupstorage.BackupStorageMap["azblob"] = &AZBlobBackupStorage{} 431 }