github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/chunk/client/azure/blob_storage_client.go (about) 1 package azure 2 3 import ( 4 "context" 5 "errors" 6 "flag" 7 "fmt" 8 "io" 9 "net" 10 "net/http" 11 "net/url" 12 "strings" 13 "time" 14 15 "github.com/Azure/azure-pipeline-go/pipeline" 16 "github.com/Azure/azure-storage-blob-go/azblob" 17 "github.com/Azure/go-autorest/autorest/adal" 18 "github.com/Azure/go-autorest/autorest/azure/auth" 19 "github.com/grafana/dskit/flagext" 20 "github.com/mattn/go-ieproxy" 21 "github.com/prometheus/client_golang/prometheus" 22 "github.com/weaveworks/common/instrument" 23 24 "github.com/grafana/loki/pkg/storage/chunk/client" 25 "github.com/grafana/loki/pkg/storage/chunk/client/hedging" 26 client_util "github.com/grafana/loki/pkg/storage/chunk/client/util" 27 "github.com/grafana/loki/pkg/util" 28 "github.com/grafana/loki/pkg/util/log" 29 ) 30 31 const ( 32 // Environment 33 azureGlobal = "AzureGlobal" 34 azureChinaCloud = "AzureChinaCloud" 35 azureGermanCloud = "AzureGermanCloud" 36 azureUSGovernment = "AzureUSGovernment" 37 ) 38 39 var ( 40 supportedEnvironments = []string{azureGlobal, azureChinaCloud, azureGermanCloud, azureUSGovernment} 41 noClientKey = azblob.ClientProvidedKeyOptions{} 42 43 defaultEndpoints = map[string]string{ 44 azureGlobal: "blob.core.windows.net", 45 azureChinaCloud: "blob.core.chinacloudapi.cn", 46 azureGermanCloud: "blob.core.cloudapi.de", 47 azureUSGovernment: "blob.core.usgovcloudapi.net", 48 } 49 50 // default Azure http client. 51 defaultClientFactory = func() *http.Client { 52 return &http.Client{ 53 Transport: &http.Transport{ 54 Proxy: ieproxy.GetProxyFunc(), 55 Dial: (&net.Dialer{ 56 Timeout: 30 * time.Second, 57 KeepAlive: 30 * time.Second, 58 DualStack: true, 59 }).Dial, 60 MaxIdleConns: 200, 61 MaxIdleConnsPerHost: 200, 62 IdleConnTimeout: 90 * time.Second, 63 TLSHandshakeTimeout: 10 * time.Second, 64 ExpectContinueTimeout: 1 * time.Second, 65 DisableKeepAlives: false, 66 DisableCompression: false, 67 MaxResponseHeaderBytes: 0, 68 }, 69 } 70 } 71 ) 72 73 // BlobStorageConfig defines the configurable flags that can be defined when using azure blob storage. 74 type BlobStorageConfig struct { 75 Environment string `yaml:"environment"` 76 StorageAccountName string `yaml:"account_name"` 77 StorageAccountKey flagext.Secret `yaml:"account_key"` 78 ContainerName string `yaml:"container_name"` 79 Endpoint string `yaml:"endpoint_suffix"` 80 UseManagedIdentity bool `yaml:"use_managed_identity"` 81 UserAssignedID string `yaml:"user_assigned_id"` 82 ChunkDelimiter string `yaml:"chunk_delimiter"` 83 DownloadBufferSize int `yaml:"download_buffer_size"` 84 UploadBufferSize int `yaml:"upload_buffer_size"` 85 UploadBufferCount int `yaml:"upload_buffer_count"` 86 RequestTimeout time.Duration `yaml:"request_timeout"` 87 MaxRetries int `yaml:"max_retries"` 88 MinRetryDelay time.Duration `yaml:"min_retry_delay"` 89 MaxRetryDelay time.Duration `yaml:"max_retry_delay"` 90 } 91 92 // RegisterFlags adds the flags required to config this to the given FlagSet 93 func (c *BlobStorageConfig) RegisterFlags(f *flag.FlagSet) { 94 c.RegisterFlagsWithPrefix("", f) 95 } 96 97 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet 98 func (c *BlobStorageConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { 99 f.StringVar(&c.Environment, prefix+"azure.environment", azureGlobal, fmt.Sprintf("Azure Cloud environment. Supported values are: %s.", strings.Join(supportedEnvironments, ", "))) 100 f.StringVar(&c.StorageAccountName, prefix+"azure.account-name", "", "Azure storage account name.") 101 f.Var(&c.StorageAccountKey, prefix+"azure.account-key", "Azure storage account key.") 102 f.StringVar(&c.ContainerName, prefix+"azure.container-name", "loki", "Name of the storage account blob container used to store chunks. This container must be created before running cortex.") 103 f.StringVar(&c.Endpoint, prefix+"azure.endpoint-suffix", "", "Azure storage endpoint suffix without schema. The storage account name will be prefixed to this value to create the FQDN.") 104 f.BoolVar(&c.UseManagedIdentity, prefix+"azure.use-managed-identity", false, "Use Managed Identity to authenticate to the Azure storage account.") 105 f.StringVar(&c.UserAssignedID, prefix+"azure.user-assigned-id", "", "User assigned identity ID to authenticate to the Azure storage account.") 106 f.StringVar(&c.ChunkDelimiter, prefix+"azure.chunk-delimiter", "-", "Chunk delimiter for blob ID to be used") 107 f.DurationVar(&c.RequestTimeout, prefix+"azure.request-timeout", 30*time.Second, "Timeout for requests made against azure blob storage.") 108 f.IntVar(&c.DownloadBufferSize, prefix+"azure.download-buffer-size", 512000, "Preallocated buffer size for downloads.") 109 f.IntVar(&c.UploadBufferSize, prefix+"azure.upload-buffer-size", 256000, "Preallocated buffer size for uploads.") 110 f.IntVar(&c.UploadBufferCount, prefix+"azure.download-buffer-count", 1, "Number of buffers used to used to upload a chunk.") 111 f.IntVar(&c.MaxRetries, prefix+"azure.max-retries", 5, "Number of retries for a request which times out.") 112 f.DurationVar(&c.MinRetryDelay, prefix+"azure.min-retry-delay", 10*time.Millisecond, "Minimum time to wait before retrying a request.") 113 f.DurationVar(&c.MaxRetryDelay, prefix+"azure.max-retry-delay", 500*time.Millisecond, "Maximum time to wait before retrying a request.") 114 } 115 116 type BlobStorageMetrics struct { 117 requestDuration *prometheus.HistogramVec 118 egressBytesTotal prometheus.Counter 119 } 120 121 // NewBlobStorageMetrics creates the blob storage metrics struct and registers all of it's metrics. 122 func NewBlobStorageMetrics() BlobStorageMetrics { 123 b := BlobStorageMetrics{ 124 requestDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ 125 Namespace: "loki", 126 Name: "azure_blob_request_duration_seconds", 127 Help: "Time spent doing azure blob requests.", 128 // Latency seems to range from a few ms to a few secs and is 129 // important. So use 6 buckets from 5ms to 5s. 130 Buckets: prometheus.ExponentialBuckets(0.005, 4, 6), 131 }, []string{"operation", "status_code"}), 132 egressBytesTotal: prometheus.NewCounter(prometheus.CounterOpts{ 133 Namespace: "loki", 134 Name: "azure_blob_egress_bytes_total", 135 Help: "Total bytes downloaded from Azure Blob Storage.", 136 }), 137 } 138 prometheus.MustRegister(b.requestDuration) 139 prometheus.MustRegister(b.egressBytesTotal) 140 return b 141 } 142 143 // Unregister unregisters the blob storage metrics with the prometheus default registerer, useful for tests 144 // where we frequently need to create multiple instances of the metrics struct, but not globally. 145 func (bm *BlobStorageMetrics) Unregister() { 146 prometheus.Unregister(bm.requestDuration) 147 prometheus.Unregister(bm.egressBytesTotal) 148 } 149 150 // BlobStorage is used to interact with azure blob storage for setting or getting time series chunks. 151 // Implements ObjectStorage 152 type BlobStorage struct { 153 // blobService storage.Serv 154 cfg *BlobStorageConfig 155 156 metrics BlobStorageMetrics 157 158 containerURL azblob.ContainerURL 159 160 pipeline pipeline.Pipeline 161 hedgingPipeline pipeline.Pipeline 162 } 163 164 // NewBlobStorage creates a new instance of the BlobStorage struct. 165 func NewBlobStorage(cfg *BlobStorageConfig, metrics BlobStorageMetrics, hedgingCfg hedging.Config) (*BlobStorage, error) { 166 log.WarnExperimentalUse("Azure Blob Storage", log.Logger) 167 blobStorage := &BlobStorage{ 168 cfg: cfg, 169 metrics: metrics, 170 } 171 pipeline, err := blobStorage.newPipeline(hedgingCfg, false) 172 if err != nil { 173 return nil, err 174 } 175 blobStorage.pipeline = pipeline 176 hedgingPipeline, err := blobStorage.newPipeline(hedgingCfg, true) 177 if err != nil { 178 return nil, err 179 } 180 blobStorage.hedgingPipeline = hedgingPipeline 181 blobStorage.containerURL, err = blobStorage.buildContainerURL() 182 if err != nil { 183 return nil, err 184 } 185 186 return blobStorage, nil 187 } 188 189 // Stop is a no op, as there are no background workers with this driver currently 190 func (b *BlobStorage) Stop() {} 191 192 // GetObject returns a reader and the size for the specified object key. 193 func (b *BlobStorage) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, int64, error) { 194 var cancel context.CancelFunc = func() {} 195 if b.cfg.RequestTimeout > 0 { 196 ctx, cancel = context.WithTimeout(ctx, b.cfg.RequestTimeout) 197 } 198 199 var ( 200 size int64 201 rc io.ReadCloser 202 ) 203 err := instrument.CollectedRequest(ctx, "azure.GetObject", instrument.NewHistogramCollector(b.metrics.requestDuration), instrument.ErrorCode, func(ctx context.Context) error { 204 var err error 205 rc, size, err = b.getObject(ctx, objectKey) 206 return err 207 }) 208 b.metrics.egressBytesTotal.Add(float64(size)) 209 if err != nil { 210 // cancel the context if there is an error. 211 cancel() 212 return nil, 0, err 213 } 214 // else return a wrapped ReadCloser which cancels the context while closing the reader. 215 return client_util.NewReadCloserWithContextCancelFunc(rc, cancel), size, nil 216 } 217 218 func (b *BlobStorage) getObject(ctx context.Context, objectKey string) (rc io.ReadCloser, size int64, err error) { 219 blockBlobURL, err := b.getBlobURL(objectKey, true) 220 if err != nil { 221 return nil, 0, err 222 } 223 224 // Request access to the blob 225 downloadResponse, err := blockBlobURL.Download(ctx, 0, azblob.CountToEnd, azblob.BlobAccessConditions{}, false, noClientKey) 226 if err != nil { 227 return nil, 0, err 228 } 229 230 return downloadResponse.Body(azblob.RetryReaderOptions{MaxRetryRequests: b.cfg.MaxRetries}), downloadResponse.ContentLength(), nil 231 } 232 233 func (b *BlobStorage) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error { 234 return instrument.CollectedRequest(ctx, "azure.PutObject", instrument.NewHistogramCollector(b.metrics.requestDuration), instrument.ErrorCode, func(ctx context.Context) error { 235 blockBlobURL, err := b.getBlobURL(objectKey, false) 236 if err != nil { 237 return err 238 } 239 240 bufferSize := b.cfg.UploadBufferSize 241 maxBuffers := b.cfg.UploadBufferCount 242 _, err = azblob.UploadStreamToBlockBlob(ctx, object, blockBlobURL, 243 azblob.UploadStreamToBlockBlobOptions{BufferSize: bufferSize, MaxBuffers: maxBuffers}) 244 245 return err 246 }) 247 } 248 249 func (b *BlobStorage) getBlobURL(blobID string, hedging bool) (azblob.BlockBlobURL, error) { 250 blobID = strings.Replace(blobID, ":", b.cfg.ChunkDelimiter, -1) 251 252 // generate url for new chunk blob 253 u, err := url.Parse(fmt.Sprintf(b.selectBlobURLFmt(), b.cfg.StorageAccountName, b.cfg.ContainerName, blobID)) 254 if err != nil { 255 return azblob.BlockBlobURL{}, err 256 } 257 pipeline := b.pipeline 258 if hedging { 259 pipeline = b.hedgingPipeline 260 } 261 262 return azblob.NewBlockBlobURL(*u, pipeline), nil 263 } 264 265 func (b *BlobStorage) buildContainerURL() (azblob.ContainerURL, error) { 266 u, err := url.Parse(fmt.Sprintf(b.selectContainerURLFmt(), b.cfg.StorageAccountName, b.cfg.ContainerName)) 267 if err != nil { 268 return azblob.ContainerURL{}, err 269 } 270 271 return azblob.NewContainerURL(*u, b.pipeline), nil 272 } 273 274 func (b *BlobStorage) newPipeline(hedgingCfg hedging.Config, hedging bool) (pipeline.Pipeline, error) { 275 // defining the Azure Pipeline Options 276 opts := azblob.PipelineOptions{ 277 Retry: azblob.RetryOptions{ 278 Policy: azblob.RetryPolicyExponential, 279 MaxTries: (int32)(b.cfg.MaxRetries), 280 TryTimeout: b.cfg.RequestTimeout, 281 RetryDelay: b.cfg.MinRetryDelay, 282 MaxRetryDelay: b.cfg.MaxRetryDelay, 283 }, 284 } 285 286 client := defaultClientFactory() 287 288 opts.HTTPSender = pipeline.FactoryFunc(func(next pipeline.Policy, po *pipeline.PolicyOptions) pipeline.PolicyFunc { 289 return func(ctx context.Context, request pipeline.Request) (pipeline.Response, error) { 290 resp, err := client.Do(request.WithContext(ctx)) 291 return pipeline.NewHTTPResponse(resp), err 292 } 293 }) 294 295 if hedging { 296 client, err := hedgingCfg.ClientWithRegisterer(client, prometheus.WrapRegistererWithPrefix("loki_", prometheus.DefaultRegisterer)) 297 if err != nil { 298 return nil, err 299 } 300 opts.HTTPSender = pipeline.FactoryFunc(func(next pipeline.Policy, po *pipeline.PolicyOptions) pipeline.PolicyFunc { 301 return func(ctx context.Context, request pipeline.Request) (pipeline.Response, error) { 302 resp, err := client.Do(request.WithContext(ctx)) 303 return pipeline.NewHTTPResponse(resp), err 304 } 305 }) 306 } 307 308 if !b.cfg.UseManagedIdentity && b.cfg.UserAssignedID == "" { 309 credential, err := azblob.NewSharedKeyCredential(b.cfg.StorageAccountName, b.cfg.StorageAccountKey.String()) 310 if err != nil { 311 return nil, err 312 } 313 314 return azblob.NewPipeline(credential, opts), nil 315 } 316 317 tokenCredential, err := b.getOAuthToken() 318 if err != nil { 319 return nil, err 320 } 321 322 return azblob.NewPipeline(*tokenCredential, opts), nil 323 } 324 325 func (b *BlobStorage) getOAuthToken() (*azblob.TokenCredential, error) { 326 spt, err := b.getServicePrincipalToken() 327 if err != nil { 328 return nil, err 329 } 330 331 // Refresh obtains a fresh token 332 err = spt.Refresh() 333 if err != nil { 334 return nil, err 335 } 336 337 tc := azblob.NewTokenCredential(spt.Token().AccessToken, func(tc azblob.TokenCredential) time.Duration { 338 err := spt.Refresh() 339 if err != nil { 340 // something went wrong, prevent the refresher from being triggered again 341 return 0 342 } 343 344 // set the new token value 345 tc.SetToken(spt.Token().AccessToken) 346 347 // get the next token slightly before the current one expires 348 return time.Until(spt.Token().Expires()) - 10*time.Second 349 }) 350 351 return &tc, nil 352 } 353 354 func (b *BlobStorage) getServicePrincipalToken() (*adal.ServicePrincipalToken, error) { 355 var endpoint string 356 if b.cfg.Endpoint != "" { 357 endpoint = b.cfg.Endpoint 358 } else { 359 endpoint = defaultEndpoints[b.cfg.Environment] 360 } 361 362 resource := fmt.Sprintf("https://%s.%s", b.cfg.StorageAccountName, endpoint) 363 364 msiConfig := auth.MSIConfig{ 365 Resource: resource, 366 } 367 368 if b.cfg.UserAssignedID != "" { 369 msiConfig.ClientID = b.cfg.UserAssignedID 370 } 371 372 return msiConfig.ServicePrincipalToken() 373 } 374 375 // List implements chunk.ObjectClient. 376 func (b *BlobStorage) List(ctx context.Context, prefix, delimiter string) ([]client.StorageObject, []client.StorageCommonPrefix, error) { 377 var storageObjects []client.StorageObject 378 var commonPrefixes []client.StorageCommonPrefix 379 380 for marker := (azblob.Marker{}); marker.NotDone(); { 381 if ctx.Err() != nil { 382 return nil, nil, ctx.Err() 383 } 384 385 err := instrument.CollectedRequest(ctx, "azure.List", instrument.NewHistogramCollector(b.metrics.requestDuration), instrument.ErrorCode, func(ctx context.Context) error { 386 listBlob, err := b.containerURL.ListBlobsHierarchySegment(ctx, marker, delimiter, azblob.ListBlobsSegmentOptions{Prefix: prefix}) 387 if err != nil { 388 return err 389 } 390 391 marker = listBlob.NextMarker 392 393 // Process the blobs returned in this result segment (if the segment is empty, the loop body won't execute) 394 for _, blobInfo := range listBlob.Segment.BlobItems { 395 storageObjects = append(storageObjects, client.StorageObject{ 396 Key: blobInfo.Name, 397 ModifiedAt: blobInfo.Properties.LastModified, 398 }) 399 } 400 401 // Process the BlobPrefixes so called commonPrefixes or synthetic directories in the listed synthetic directory 402 for _, blobPrefix := range listBlob.Segment.BlobPrefixes { 403 commonPrefixes = append(commonPrefixes, client.StorageCommonPrefix(blobPrefix.Name)) 404 } 405 406 return nil 407 }) 408 if err != nil { 409 return nil, nil, err 410 } 411 412 } 413 414 return storageObjects, commonPrefixes, nil 415 } 416 417 func (b *BlobStorage) DeleteObject(ctx context.Context, blobID string) error { 418 return instrument.CollectedRequest(ctx, "azure.DeleteObject", instrument.NewHistogramCollector(b.metrics.requestDuration), instrument.ErrorCode, func(ctx context.Context) error { 419 blockBlobURL, err := b.getBlobURL(blobID, false) 420 if err != nil { 421 return err 422 } 423 424 _, err = blockBlobURL.Delete(ctx, azblob.DeleteSnapshotsOptionInclude, azblob.BlobAccessConditions{}) 425 return err 426 }) 427 } 428 429 // Validate the config. 430 func (c *BlobStorageConfig) Validate() error { 431 if !util.StringsContain(supportedEnvironments, c.Environment) { 432 return fmt.Errorf("unsupported Azure blob storage environment: %s, please select one of: %s ", c.Environment, strings.Join(supportedEnvironments, ", ")) 433 } 434 return nil 435 } 436 437 func (b *BlobStorage) selectBlobURLFmt() string { 438 return fmt.Sprintf("https://%%s.%s/%%s/%%s", defaultEndpoints[b.cfg.Environment]) 439 } 440 441 func (b *BlobStorage) selectContainerURLFmt() string { 442 return fmt.Sprintf("https://%%s.%s/%%s", defaultEndpoints[b.cfg.Environment]) 443 } 444 445 // IsObjectNotFoundErr returns true if error means that object is not found. Relevant to GetObject and DeleteObject operations. 446 func (b *BlobStorage) IsObjectNotFoundErr(err error) bool { 447 var e azblob.StorageError 448 if errors.As(err, &e) && e.ServiceCode() == azblob.ServiceCodeBlobNotFound { 449 return true 450 } 451 452 return false 453 }