github.com/sequix/cortex@v1.1.6/pkg/chunk/aws/dynamodb_storage_client.go (about)

     1  package aws
     2  
     3  import (
     4  	"context"
     5  	"flag"
     6  	"fmt"
     7  	"net/url"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/go-kit/kit/log/level"
    12  	ot "github.com/opentracing/opentracing-go"
    13  	"golang.org/x/time/rate"
    14  
    15  	"github.com/aws/aws-sdk-go/aws"
    16  	"github.com/aws/aws-sdk-go/aws/awserr"
    17  	"github.com/aws/aws-sdk-go/aws/client"
    18  	"github.com/aws/aws-sdk-go/aws/request"
    19  	"github.com/aws/aws-sdk-go/aws/session"
    20  	"github.com/aws/aws-sdk-go/service/dynamodb"
    21  	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
    22  	"github.com/prometheus/client_golang/prometheus"
    23  
    24  	"github.com/sequix/cortex/pkg/chunk"
    25  	chunk_util "github.com/sequix/cortex/pkg/chunk/util"
    26  	"github.com/sequix/cortex/pkg/util"
    27  	"github.com/sequix/cortex/pkg/util/flagext"
    28  	"github.com/sequix/cortex/pkg/util/spanlogger"
    29  	awscommon "github.com/weaveworks/common/aws"
    30  	"github.com/weaveworks/common/instrument"
    31  	"github.com/weaveworks/common/user"
    32  )
    33  
    34  const (
    35  	hashKey  = "h"
    36  	rangeKey = "r"
    37  	valueKey = "c"
    38  
    39  	// For dynamodb errors
    40  	tableNameLabel   = "table"
    41  	errorReasonLabel = "error"
    42  	otherError       = "other"
    43  
    44  	// See http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html.
    45  	dynamoDBMaxWriteBatchSize = 25
    46  	dynamoDBMaxReadBatchSize  = 100
    47  	validationException       = "ValidationException"
    48  )
    49  
    50  var (
    51  	dynamoRequestDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
    52  		Namespace: "cortex",
    53  		Name:      "dynamo_request_duration_seconds",
    54  		Help:      "Time spent doing DynamoDB requests.",
    55  
    56  		// DynamoDB latency seems to range from a few ms to a few sec and is
    57  		// important.  So use 8 buckets from 128us to 2s.
    58  		Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8),
    59  	}, []string{"operation", "status_code"}))
    60  	dynamoConsumedCapacity = prometheus.NewCounterVec(prometheus.CounterOpts{
    61  		Namespace: "cortex",
    62  		Name:      "dynamo_consumed_capacity_total",
    63  		Help:      "The capacity units consumed by operation.",
    64  	}, []string{"operation", tableNameLabel})
    65  	dynamoThrottled = prometheus.NewCounterVec(prometheus.CounterOpts{
    66  		Namespace: "cortex",
    67  		Name:      "dynamo_throttled_total",
    68  		Help:      "The total number of throttled events.",
    69  	}, []string{"operation", tableNameLabel})
    70  	dynamoFailures = prometheus.NewCounterVec(prometheus.CounterOpts{
    71  		Namespace: "cortex",
    72  		Name:      "dynamo_failures_total",
    73  		Help:      "The total number of errors while storing chunks to the chunk store.",
    74  	}, []string{tableNameLabel, errorReasonLabel, "operation"})
    75  	dynamoDroppedRequests = prometheus.NewCounterVec(prometheus.CounterOpts{
    76  		Namespace: "cortex",
    77  		Name:      "dynamo_dropped_requests_total",
    78  		Help:      "The total number of requests which were dropped due to errors encountered from dynamo.",
    79  	}, []string{tableNameLabel, errorReasonLabel, "operation"})
    80  	dynamoQueryPagesCount = prometheus.NewHistogram(prometheus.HistogramOpts{
    81  		Namespace: "cortex",
    82  		Name:      "dynamo_query_pages_count",
    83  		Help:      "Number of pages per query.",
    84  		// Most queries will have one page, however this may increase with fuzzy
    85  		// metric names.
    86  		Buckets: prometheus.ExponentialBuckets(1, 4, 6),
    87  	})
    88  )
    89  
    90  func init() {
    91  	dynamoRequestDuration.Register()
    92  	prometheus.MustRegister(dynamoConsumedCapacity)
    93  	prometheus.MustRegister(dynamoThrottled)
    94  	prometheus.MustRegister(dynamoFailures)
    95  	prometheus.MustRegister(dynamoQueryPagesCount)
    96  	prometheus.MustRegister(dynamoDroppedRequests)
    97  }
    98  
    99  // DynamoDBConfig specifies config for a DynamoDB database.
   100  type DynamoDBConfig struct {
   101  	DynamoDB               flagext.URLValue
   102  	APILimit               float64
   103  	ThrottleLimit          float64
   104  	ApplicationAutoScaling flagext.URLValue
   105  	Metrics                MetricsAutoScalingConfig
   106  	ChunkGangSize          int
   107  	ChunkGetMaxParallelism int
   108  	backoffConfig          util.BackoffConfig
   109  }
   110  
   111  // RegisterFlags adds the flags required to config this to the given FlagSet
   112  func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
   113  	f.Var(&cfg.DynamoDB, "dynamodb.url", "DynamoDB endpoint URL with escaped Key and Secret encoded. "+
   114  		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<table-name> to use a mock in-memory implementation.")
   115  	f.Float64Var(&cfg.APILimit, "dynamodb.api-limit", 2.0, "DynamoDB table management requests per second limit.")
   116  	f.Float64Var(&cfg.ThrottleLimit, "dynamodb.throttle-limit", 10.0, "DynamoDB rate cap to back off when throttled.")
   117  	f.Var(&cfg.ApplicationAutoScaling, "applicationautoscaling.url", "ApplicationAutoscaling endpoint URL with escaped Key and Secret encoded.")
   118  	f.IntVar(&cfg.ChunkGangSize, "dynamodb.chunk.gang.size", 10, "Number of chunks to group together to parallelise fetches (zero to disable)")
   119  	f.IntVar(&cfg.ChunkGetMaxParallelism, "dynamodb.chunk.get.max.parallelism", 32, "Max number of chunk-get operations to start in parallel")
   120  	f.DurationVar(&cfg.backoffConfig.MinBackoff, "dynamodb.min-backoff", 100*time.Millisecond, "Minimum backoff time")
   121  	f.DurationVar(&cfg.backoffConfig.MaxBackoff, "dynamodb.max-backoff", 50*time.Second, "Maximum backoff time")
   122  	f.IntVar(&cfg.backoffConfig.MaxRetries, "dynamodb.max-retries", 20, "Maximum number of times to retry an operation")
   123  	cfg.Metrics.RegisterFlags(f)
   124  }
   125  
   126  // StorageConfig specifies config for storing data on AWS.
   127  type StorageConfig struct {
   128  	DynamoDBConfig
   129  	S3               flagext.URLValue
   130  	S3ForcePathStyle bool
   131  }
   132  
   133  // RegisterFlags adds the flags required to config this to the given FlagSet
   134  func (cfg *StorageConfig) RegisterFlags(f *flag.FlagSet) {
   135  	cfg.DynamoDBConfig.RegisterFlags(f)
   136  
   137  	f.Var(&cfg.S3, "s3.url", "S3 endpoint URL with escaped Key and Secret encoded. "+
   138  		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<bucket-name> to use a mock in-memory implementation.")
   139  	f.BoolVar(&cfg.S3ForcePathStyle, "s3.force-path-style", false, "Set this to `true` to force the request to use path-style addressing.")
   140  }
   141  
   142  type dynamoDBStorageClient struct {
   143  	cfg       DynamoDBConfig
   144  	schemaCfg chunk.SchemaConfig
   145  
   146  	DynamoDB dynamodbiface.DynamoDBAPI
   147  	// These rate-limiters let us slow down when DynamoDB signals provision limits.
   148  	writeThrottle *rate.Limiter
   149  
   150  	// These functions exists for mocking, so we don't have to write a whole load
   151  	// of boilerplate.
   152  	queryRequestFn          func(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest
   153  	batchGetItemRequestFn   func(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest
   154  	batchWriteItemRequestFn func(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest
   155  }
   156  
   157  // NewDynamoDBIndexClient makes a new DynamoDB-backed IndexClient.
   158  func NewDynamoDBIndexClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
   159  	return newDynamoDBStorageClient(cfg, schemaCfg)
   160  }
   161  
   162  // NewDynamoDBObjectClient makes a new DynamoDB-backed ObjectClient.
   163  func NewDynamoDBObjectClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
   164  	return newDynamoDBStorageClient(cfg, schemaCfg)
   165  }
   166  
   167  // newDynamoDBStorageClient makes a new DynamoDB-backed IndexClient and ObjectClient.
   168  func newDynamoDBStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (*dynamoDBStorageClient, error) {
   169  	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
   170  	if err != nil {
   171  		return nil, err
   172  	}
   173  
   174  	client := &dynamoDBStorageClient{
   175  		cfg:           cfg,
   176  		schemaCfg:     schemaCfg,
   177  		DynamoDB:      dynamoDB,
   178  		writeThrottle: rate.NewLimiter(rate.Limit(cfg.ThrottleLimit), dynamoDBMaxWriteBatchSize),
   179  	}
   180  	client.queryRequestFn = client.queryRequest
   181  	client.batchGetItemRequestFn = client.batchGetItemRequest
   182  	client.batchWriteItemRequestFn = client.batchWriteItemRequest
   183  	return client, nil
   184  }
   185  
   186  // Stop implements chunk.IndexClient.
   187  func (a dynamoDBStorageClient) Stop() {
   188  }
   189  
   190  // NewWriteBatch implements chunk.IndexClient.
   191  func (a dynamoDBStorageClient) NewWriteBatch() chunk.WriteBatch {
   192  	return dynamoDBWriteBatch(map[string][]*dynamodb.WriteRequest{})
   193  }
   194  
   195  func logWriteRetry(ctx context.Context, unprocessed dynamoDBWriteBatch) {
   196  	userID, _ := user.ExtractOrgID(ctx)
   197  	for table, reqs := range unprocessed {
   198  		dynamoThrottled.WithLabelValues("DynamoDB.BatchWriteItem", table).Add(float64(len(reqs)))
   199  		for _, req := range reqs {
   200  			item := req.PutRequest.Item
   201  			var hash, rnge string
   202  			if hashAttr, ok := item[hashKey]; ok {
   203  				if hashAttr.S != nil {
   204  					hash = *hashAttr.S
   205  				}
   206  			}
   207  			if rangeAttr, ok := item[rangeKey]; ok {
   208  				rnge = string(rangeAttr.B)
   209  			}
   210  			util.Event().Log("msg", "store retry", "table", table, "userID", userID, "hashKey", hash, "rangeKey", rnge)
   211  		}
   212  	}
   213  }
   214  
   215  // BatchWrite writes requests to the underlying storage, handling retries and backoff.
   216  // Structure is identical to getDynamoDBChunks(), but operating on different datatypes
   217  // so cannot share implementation.  If you fix a bug here fix it there too.
   218  func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) error {
   219  	outstanding := input.(dynamoDBWriteBatch)
   220  	unprocessed := dynamoDBWriteBatch{}
   221  
   222  	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
   223  
   224  	for outstanding.Len()+unprocessed.Len() > 0 && backoff.Ongoing() {
   225  		requests := dynamoDBWriteBatch{}
   226  		requests.TakeReqs(outstanding, dynamoDBMaxWriteBatchSize)
   227  		requests.TakeReqs(unprocessed, dynamoDBMaxWriteBatchSize)
   228  
   229  		request := a.batchWriteItemRequestFn(ctx, &dynamodb.BatchWriteItemInput{
   230  			RequestItems:           requests,
   231  			ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
   232  		})
   233  
   234  		err := instrument.CollectedRequest(ctx, "DynamoDB.BatchWriteItem", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
   235  			return request.Send()
   236  		})
   237  		resp := request.Data().(*dynamodb.BatchWriteItemOutput)
   238  
   239  		for _, cc := range resp.ConsumedCapacity {
   240  			dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchWriteItem", *cc.TableName).
   241  				Add(float64(*cc.CapacityUnits))
   242  		}
   243  
   244  		if err != nil {
   245  			for tableName := range requests {
   246  				recordDynamoError(tableName, err, "DynamoDB.BatchWriteItem")
   247  			}
   248  
   249  			// If we get provisionedThroughputExceededException, then no items were processed,
   250  			// so back off and retry all.
   251  			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
   252  				logWriteRetry(ctx, requests)
   253  				unprocessed.TakeReqs(requests, -1)
   254  				a.writeThrottle.WaitN(ctx, len(requests))
   255  				backoff.Wait()
   256  				continue
   257  			} else if ok && awsErr.Code() == validationException {
   258  				// this write will never work, so the only option is to drop the offending items and continue.
   259  				level.Warn(util.Logger).Log("msg", "Data lost while flushing to Dynamo", "err", awsErr)
   260  				level.Debug(util.Logger).Log("msg", "Dropped request details", "requests", requests)
   261  				util.Event().Log("msg", "ValidationException", "requests", requests)
   262  				// recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context
   263  				// to determine if a request was dropped (or not)
   264  				for tableName := range requests {
   265  					dynamoDroppedRequests.WithLabelValues(tableName, validationException, "DynamoDB.BatchWriteItem").Inc()
   266  				}
   267  				continue
   268  			}
   269  
   270  			// All other errors are critical.
   271  			return err
   272  		}
   273  
   274  		// If there are unprocessed items, retry those items.
   275  		unprocessedItems := dynamoDBWriteBatch(resp.UnprocessedItems)
   276  		if len(unprocessedItems) > 0 {
   277  			logWriteRetry(ctx, unprocessedItems)
   278  			a.writeThrottle.WaitN(ctx, unprocessedItems.Len())
   279  			unprocessed.TakeReqs(unprocessedItems, -1)
   280  		}
   281  
   282  		backoff.Reset()
   283  	}
   284  
   285  	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
   286  		return fmt.Errorf("failed to write chunk, %d values remaining: %s", valuesLeft, backoff.Err())
   287  	}
   288  	return backoff.Err()
   289  }
   290  
   291  // QueryPages implements chunk.IndexClient.
   292  func (a dynamoDBStorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
   293  	return chunk_util.DoParallelQueries(ctx, a.query, queries, callback)
   294  }
   295  
   296  func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
   297  	input := &dynamodb.QueryInput{
   298  		TableName: aws.String(query.TableName),
   299  		KeyConditions: map[string]*dynamodb.Condition{
   300  			hashKey: {
   301  				AttributeValueList: []*dynamodb.AttributeValue{
   302  					{S: aws.String(query.HashValue)},
   303  				},
   304  				ComparisonOperator: aws.String(dynamodb.ComparisonOperatorEq),
   305  			},
   306  		},
   307  		ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
   308  	}
   309  
   310  	if query.RangeValuePrefix != nil {
   311  		input.KeyConditions[rangeKey] = &dynamodb.Condition{
   312  			AttributeValueList: []*dynamodb.AttributeValue{
   313  				{B: query.RangeValuePrefix},
   314  			},
   315  			ComparisonOperator: aws.String(dynamodb.ComparisonOperatorBeginsWith),
   316  		}
   317  	} else if query.RangeValueStart != nil {
   318  		input.KeyConditions[rangeKey] = &dynamodb.Condition{
   319  			AttributeValueList: []*dynamodb.AttributeValue{
   320  				{B: query.RangeValueStart},
   321  			},
   322  			ComparisonOperator: aws.String(dynamodb.ComparisonOperatorGe),
   323  		}
   324  	}
   325  
   326  	// Filters
   327  	if query.ValueEqual != nil {
   328  		input.FilterExpression = aws.String(fmt.Sprintf("%s = :v", valueKey))
   329  		input.ExpressionAttributeValues = map[string]*dynamodb.AttributeValue{
   330  			":v": {
   331  				B: query.ValueEqual,
   332  			},
   333  		}
   334  	}
   335  
   336  	request := a.queryRequestFn(ctx, input)
   337  	pageCount := 0
   338  	defer func() {
   339  		dynamoQueryPagesCount.Observe(float64(pageCount))
   340  	}()
   341  
   342  	for page := request; page != nil; page = page.NextPage() {
   343  		pageCount++
   344  
   345  		response, err := a.queryPage(ctx, input, page, query.HashValue, pageCount)
   346  		if err != nil {
   347  			return err
   348  		}
   349  
   350  		if !callback(response) {
   351  			if err != nil {
   352  				return fmt.Errorf("QueryPages error: table=%v, err=%v", *input.TableName, page.Error())
   353  			}
   354  			return nil
   355  		}
   356  		if !page.HasNextPage() {
   357  			return nil
   358  		}
   359  	}
   360  	return nil
   361  }
   362  
   363  func (a dynamoDBStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest, hashValue string, pageCount int) (*dynamoDBReadResponse, error) {
   364  	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
   365  
   366  	var err error
   367  	for backoff.Ongoing() {
   368  		err = instrument.CollectedRequest(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, instrument.ErrorCode, func(innerCtx context.Context) error {
   369  			if sp := ot.SpanFromContext(innerCtx); sp != nil {
   370  				sp.SetTag("tableName", aws.StringValue(input.TableName))
   371  				sp.SetTag("hashValue", hashValue)
   372  				sp.SetTag("page", pageCount)
   373  				sp.SetTag("retry", backoff.NumRetries())
   374  			}
   375  			return page.Send()
   376  		})
   377  
   378  		if cc := page.Data().(*dynamodb.QueryOutput).ConsumedCapacity; cc != nil {
   379  			dynamoConsumedCapacity.WithLabelValues("DynamoDB.QueryPages", *cc.TableName).
   380  				Add(float64(*cc.CapacityUnits))
   381  		}
   382  
   383  		if err != nil {
   384  			recordDynamoError(*input.TableName, err, "DynamoDB.QueryPages")
   385  			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || page.Retryable()) {
   386  				if awsErr.Code() != dynamodb.ErrCodeProvisionedThroughputExceededException {
   387  					level.Warn(util.Logger).Log("msg", "DynamoDB error", "retry", backoff.NumRetries(), "table", *input.TableName, "err", err)
   388  				}
   389  				backoff.Wait()
   390  				continue
   391  			}
   392  			return nil, fmt.Errorf("QueryPage error: table=%v, err=%v", *input.TableName, err)
   393  		}
   394  
   395  		queryOutput := page.Data().(*dynamodb.QueryOutput)
   396  		return &dynamoDBReadResponse{
   397  			items: queryOutput.Items,
   398  		}, nil
   399  	}
   400  	return nil, fmt.Errorf("QueryPage error: %s for table %v, last error %v", backoff.Err(), *input.TableName, err)
   401  }
   402  
   403  type dynamoDBRequest interface {
   404  	NextPage() dynamoDBRequest
   405  	Send() error
   406  	Data() interface{}
   407  	Error() error
   408  	HasNextPage() bool
   409  	Retryable() bool
   410  }
   411  
   412  func (a dynamoDBStorageClient) queryRequest(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
   413  	req, _ := a.DynamoDB.QueryRequest(input)
   414  	req.SetContext(ctx)
   415  	return dynamoDBRequestAdapter{req}
   416  }
   417  
   418  func (a dynamoDBStorageClient) batchGetItemRequest(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest {
   419  	req, _ := a.DynamoDB.BatchGetItemRequest(input)
   420  	req.SetContext(ctx)
   421  	return dynamoDBRequestAdapter{req}
   422  }
   423  
   424  func (a dynamoDBStorageClient) batchWriteItemRequest(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest {
   425  	req, _ := a.DynamoDB.BatchWriteItemRequest(input)
   426  	req.SetContext(ctx)
   427  	return dynamoDBRequestAdapter{req}
   428  }
   429  
   430  type dynamoDBRequestAdapter struct {
   431  	request *request.Request
   432  }
   433  
   434  func (a dynamoDBRequestAdapter) NextPage() dynamoDBRequest {
   435  	next := a.request.NextPage()
   436  	if next == nil {
   437  		return nil
   438  	}
   439  	return dynamoDBRequestAdapter{next}
   440  }
   441  
   442  func (a dynamoDBRequestAdapter) Data() interface{} {
   443  	return a.request.Data
   444  }
   445  
   446  func (a dynamoDBRequestAdapter) Send() error {
   447  	// Clear error in case we are retrying the same operation - if we
   448  	// don't do this then the same error will come back again immediately
   449  	a.request.Error = nil
   450  	return a.request.Send()
   451  }
   452  
   453  func (a dynamoDBRequestAdapter) Error() error {
   454  	return a.request.Error
   455  }
   456  
   457  func (a dynamoDBRequestAdapter) HasNextPage() bool {
   458  	return a.request.HasNextPage()
   459  }
   460  
   461  func (a dynamoDBRequestAdapter) Retryable() bool {
   462  	return aws.BoolValue(a.request.Retryable)
   463  }
   464  
   465  type chunksPlusError struct {
   466  	chunks []chunk.Chunk
   467  	err    error
   468  }
   469  
   470  // GetChunks implements chunk.ObjectClient.
   471  func (a dynamoDBStorageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
   472  	log, ctx := spanlogger.New(ctx, "GetChunks.DynamoDB", ot.Tag{Key: "numChunks", Value: len(chunks)})
   473  	defer log.Span.Finish()
   474  	level.Debug(log).Log("chunks requested", len(chunks))
   475  
   476  	dynamoDBChunks := chunks
   477  	var err error
   478  
   479  	gangSize := a.cfg.ChunkGangSize * dynamoDBMaxReadBatchSize
   480  	if gangSize == 0 { // zero means turn feature off
   481  		gangSize = len(dynamoDBChunks)
   482  	} else {
   483  		if len(dynamoDBChunks)/gangSize > a.cfg.ChunkGetMaxParallelism {
   484  			gangSize = len(dynamoDBChunks)/a.cfg.ChunkGetMaxParallelism + 1
   485  		}
   486  	}
   487  
   488  	results := make(chan chunksPlusError)
   489  	for i := 0; i < len(dynamoDBChunks); i += gangSize {
   490  		go func(start int) {
   491  			end := start + gangSize
   492  			if end > len(dynamoDBChunks) {
   493  				end = len(dynamoDBChunks)
   494  			}
   495  			outChunks, err := a.getDynamoDBChunks(ctx, dynamoDBChunks[start:end])
   496  			results <- chunksPlusError{outChunks, err}
   497  		}(i)
   498  	}
   499  	finalChunks := []chunk.Chunk{}
   500  	for i := 0; i < len(dynamoDBChunks); i += gangSize {
   501  		in := <-results
   502  		if in.err != nil {
   503  			err = in.err // TODO: cancel other sub-queries at this point
   504  		}
   505  		finalChunks = append(finalChunks, in.chunks...)
   506  	}
   507  	level.Debug(log).Log("chunks fetched", len(finalChunks))
   508  
   509  	// Return any chunks we did receive: a partial result may be useful
   510  	return finalChunks, log.Error(err)
   511  }
   512  
   513  // As we're re-using the DynamoDB schema from the index for the chunk tables,
   514  // we need to provide a non-null, non-empty value for the range value.
   515  var placeholder = []byte{'c'}
   516  
   517  // Fetch a set of chunks from DynamoDB, handling retries and backoff.
   518  // Structure is identical to BatchWrite(), but operating on different datatypes
   519  // so cannot share implementation.  If you fix a bug here fix it there too.
   520  func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
   521  	log, ctx := spanlogger.New(ctx, "getDynamoDBChunks", ot.Tag{Key: "numChunks", Value: len(chunks)})
   522  	defer log.Span.Finish()
   523  	outstanding := dynamoDBReadRequest{}
   524  	chunksByKey := map[string]chunk.Chunk{}
   525  	for _, chunk := range chunks {
   526  		key := chunk.ExternalKey()
   527  		chunksByKey[key] = chunk
   528  		tableName, err := a.schemaCfg.ChunkTableFor(chunk.From)
   529  		if err != nil {
   530  			return nil, log.Error(err)
   531  		}
   532  		outstanding.Add(tableName, key, placeholder)
   533  	}
   534  
   535  	result := []chunk.Chunk{}
   536  	unprocessed := dynamoDBReadRequest{}
   537  	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
   538  
   539  	for outstanding.Len()+unprocessed.Len() > 0 && backoff.Ongoing() {
   540  		requests := dynamoDBReadRequest{}
   541  		requests.TakeReqs(outstanding, dynamoDBMaxReadBatchSize)
   542  		requests.TakeReqs(unprocessed, dynamoDBMaxReadBatchSize)
   543  
   544  		request := a.batchGetItemRequestFn(ctx, &dynamodb.BatchGetItemInput{
   545  			RequestItems:           requests,
   546  			ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
   547  		})
   548  
   549  		err := instrument.CollectedRequest(ctx, "DynamoDB.BatchGetItemPages", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
   550  			return request.Send()
   551  		})
   552  		response := request.Data().(*dynamodb.BatchGetItemOutput)
   553  
   554  		for _, cc := range response.ConsumedCapacity {
   555  			dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchGetItemPages", *cc.TableName).
   556  				Add(float64(*cc.CapacityUnits))
   557  		}
   558  
   559  		if err != nil {
   560  			for tableName := range requests {
   561  				recordDynamoError(tableName, err, "DynamoDB.BatchGetItemPages")
   562  			}
   563  
   564  			// If we get provisionedThroughputExceededException, then no items were processed,
   565  			// so back off and retry all.
   566  			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
   567  				unprocessed.TakeReqs(requests, -1)
   568  				backoff.Wait()
   569  				continue
   570  			} else if ok && awsErr.Code() == validationException {
   571  				// this read will never work, so the only option is to drop the offending request and continue.
   572  				level.Warn(log).Log("msg", "Error while fetching data from Dynamo", "err", awsErr)
   573  				level.Debug(log).Log("msg", "Dropped request details", "requests", requests)
   574  				// recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context
   575  				// to determine if a request was dropped (or not)
   576  				for tableName := range requests {
   577  					dynamoDroppedRequests.WithLabelValues(tableName, validationException, "DynamoDB.BatchGetItemPages").Inc()
   578  				}
   579  				continue
   580  			}
   581  
   582  			// All other errors are critical.
   583  			return nil, err
   584  		}
   585  
   586  		processedChunks, err := processChunkResponse(response, chunksByKey)
   587  		if err != nil {
   588  			return nil, log.Error(err)
   589  		}
   590  		result = append(result, processedChunks...)
   591  
   592  		// If there are unprocessed items, retry those items.
   593  		if unprocessedKeys := response.UnprocessedKeys; unprocessedKeys != nil && dynamoDBReadRequest(unprocessedKeys).Len() > 0 {
   594  			unprocessed.TakeReqs(unprocessedKeys, -1)
   595  		}
   596  
   597  		backoff.Reset()
   598  	}
   599  
   600  	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
   601  		// Return the chunks we did fetch, because partial results may be useful
   602  		return result, log.Error(fmt.Errorf("failed to query chunks, %d values remaining: %s", valuesLeft, backoff.Err()))
   603  	}
   604  	return result, nil
   605  }
   606  
   607  func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map[string]chunk.Chunk) ([]chunk.Chunk, error) {
   608  	result := []chunk.Chunk{}
   609  	decodeContext := chunk.NewDecodeContext()
   610  	for _, items := range response.Responses {
   611  		for _, item := range items {
   612  			key, ok := item[hashKey]
   613  			if !ok || key == nil || key.S == nil {
   614  				return nil, fmt.Errorf("Got response from DynamoDB with no hash key: %+v", item)
   615  			}
   616  
   617  			chunk, ok := chunksByKey[*key.S]
   618  			if !ok {
   619  				return nil, fmt.Errorf("Got response from DynamoDB with chunk I didn't ask for: %s", *key.S)
   620  			}
   621  
   622  			buf, ok := item[valueKey]
   623  			if !ok || buf == nil || buf.B == nil {
   624  				return nil, fmt.Errorf("Got response from DynamoDB with no value: %+v", item)
   625  			}
   626  
   627  			if err := chunk.Decode(decodeContext, buf.B); err != nil {
   628  				return nil, err
   629  			}
   630  
   631  			result = append(result, chunk)
   632  		}
   633  	}
   634  	return result, nil
   635  }
   636  
   637  // PutChunkAndIndex implements chunk.ObjectAndIndexClient
   638  // Combine both sets of writes before sending to DynamoDB, for performance
   639  func (a dynamoDBStorageClient) PutChunkAndIndex(ctx context.Context, c chunk.Chunk, index chunk.WriteBatch) error {
   640  	dynamoDBWrites, err := a.writesForChunks([]chunk.Chunk{c})
   641  	if err != nil {
   642  		return err
   643  	}
   644  	dynamoDBWrites.TakeReqs(index.(dynamoDBWriteBatch), 0)
   645  	return a.BatchWrite(ctx, dynamoDBWrites)
   646  }
   647  
   648  // PutChunks implements chunk.ObjectClient.
   649  func (a dynamoDBStorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
   650  	dynamoDBWrites, err := a.writesForChunks(chunks)
   651  	if err != nil {
   652  		return err
   653  	}
   654  	return a.BatchWrite(ctx, dynamoDBWrites)
   655  }
   656  
   657  func (a dynamoDBStorageClient) writesForChunks(chunks []chunk.Chunk) (dynamoDBWriteBatch, error) {
   658  	var (
   659  		dynamoDBWrites = dynamoDBWriteBatch{}
   660  	)
   661  
   662  	for i := range chunks {
   663  		buf, err := chunks[i].Encoded()
   664  		if err != nil {
   665  			return nil, err
   666  		}
   667  		key := chunks[i].ExternalKey()
   668  
   669  		table, err := a.schemaCfg.ChunkTableFor(chunks[i].From)
   670  		if err != nil {
   671  			return nil, err
   672  		}
   673  
   674  		dynamoDBWrites.Add(table, key, placeholder, buf)
   675  	}
   676  
   677  	return dynamoDBWrites, nil
   678  }
   679  
   680  // Slice of values returned; map key is attribute name
   681  type dynamoDBReadResponse struct {
   682  	items []map[string]*dynamodb.AttributeValue
   683  }
   684  
   685  func (b *dynamoDBReadResponse) Iterator() chunk.ReadBatchIterator {
   686  	return &dynamoDBReadResponseIterator{
   687  		i:                    -1,
   688  		dynamoDBReadResponse: b,
   689  	}
   690  }
   691  
   692  type dynamoDBReadResponseIterator struct {
   693  	i int
   694  	*dynamoDBReadResponse
   695  }
   696  
   697  func (b *dynamoDBReadResponseIterator) Next() bool {
   698  	b.i++
   699  	return b.i < len(b.items)
   700  }
   701  
   702  func (b *dynamoDBReadResponseIterator) RangeValue() []byte {
   703  	return b.items[b.i][rangeKey].B
   704  }
   705  
   706  func (b *dynamoDBReadResponseIterator) Value() []byte {
   707  	chunkValue, ok := b.items[b.i][valueKey]
   708  	if !ok {
   709  		return nil
   710  	}
   711  	return chunkValue.B
   712  }
   713  
   714  // map key is table name; value is a slice of things to 'put'
   715  type dynamoDBWriteBatch map[string][]*dynamodb.WriteRequest
   716  
   717  func (b dynamoDBWriteBatch) Len() int {
   718  	result := 0
   719  	for _, reqs := range b {
   720  		result += len(reqs)
   721  	}
   722  	return result
   723  }
   724  
   725  func (b dynamoDBWriteBatch) String() string {
   726  	var sb strings.Builder
   727  	sb.WriteByte('{')
   728  	for k, reqs := range b {
   729  		sb.WriteString(k)
   730  		sb.WriteString(": [")
   731  		for _, req := range reqs {
   732  			sb.WriteString(req.String())
   733  			sb.WriteByte(',')
   734  		}
   735  		sb.WriteString("], ")
   736  	}
   737  	sb.WriteByte('}')
   738  	return sb.String()
   739  }
   740  
   741  func (b dynamoDBWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
   742  	item := map[string]*dynamodb.AttributeValue{
   743  		hashKey:  {S: aws.String(hashValue)},
   744  		rangeKey: {B: rangeValue},
   745  	}
   746  
   747  	if value != nil {
   748  		item[valueKey] = &dynamodb.AttributeValue{B: value}
   749  	}
   750  
   751  	b[tableName] = append(b[tableName], &dynamodb.WriteRequest{
   752  		PutRequest: &dynamodb.PutRequest{
   753  			Item: item,
   754  		},
   755  	})
   756  }
   757  
   758  // Fill 'b' with WriteRequests from 'from' until 'b' has at most max requests. Remove those requests from 'from'.
   759  func (b dynamoDBWriteBatch) TakeReqs(from dynamoDBWriteBatch, max int) {
   760  	outLen, inLen := b.Len(), from.Len()
   761  	toFill := inLen
   762  	if max > 0 {
   763  		toFill = util.Min(inLen, max-outLen)
   764  	}
   765  	for toFill > 0 {
   766  		for tableName, fromReqs := range from {
   767  			taken := util.Min(len(fromReqs), toFill)
   768  			if taken > 0 {
   769  				b[tableName] = append(b[tableName], fromReqs[:taken]...)
   770  				from[tableName] = fromReqs[taken:]
   771  				toFill -= taken
   772  			}
   773  		}
   774  	}
   775  }
   776  
   777  // map key is table name
   778  type dynamoDBReadRequest map[string]*dynamodb.KeysAndAttributes
   779  
   780  func (b dynamoDBReadRequest) Len() int {
   781  	result := 0
   782  	for _, reqs := range b {
   783  		result += len(reqs.Keys)
   784  	}
   785  	return result
   786  }
   787  
   788  func (b dynamoDBReadRequest) Add(tableName, hashValue string, rangeValue []byte) {
   789  	requests, ok := b[tableName]
   790  	if !ok {
   791  		requests = &dynamodb.KeysAndAttributes{
   792  			AttributesToGet: []*string{
   793  				aws.String(hashKey),
   794  				aws.String(valueKey),
   795  			},
   796  		}
   797  		b[tableName] = requests
   798  	}
   799  	requests.Keys = append(requests.Keys, map[string]*dynamodb.AttributeValue{
   800  		hashKey:  {S: aws.String(hashValue)},
   801  		rangeKey: {B: rangeValue},
   802  	})
   803  }
   804  
   805  // Fill 'b' with ReadRequests from 'from' until 'b' has at most max requests. Remove those requests from 'from'.
   806  func (b dynamoDBReadRequest) TakeReqs(from dynamoDBReadRequest, max int) {
   807  	outLen, inLen := b.Len(), from.Len()
   808  	toFill := inLen
   809  	if max > 0 {
   810  		toFill = util.Min(inLen, max-outLen)
   811  	}
   812  	for toFill > 0 {
   813  		for tableName, fromReqs := range from {
   814  			taken := util.Min(len(fromReqs.Keys), toFill)
   815  			if taken > 0 {
   816  				if _, ok := b[tableName]; !ok {
   817  					b[tableName] = &dynamodb.KeysAndAttributes{
   818  						AttributesToGet: []*string{
   819  							aws.String(hashKey),
   820  							aws.String(valueKey),
   821  						},
   822  					}
   823  				}
   824  
   825  				b[tableName].Keys = append(b[tableName].Keys, fromReqs.Keys[:taken]...)
   826  				from[tableName].Keys = fromReqs.Keys[taken:]
   827  				toFill -= taken
   828  			}
   829  		}
   830  	}
   831  }
   832  
   833  func recordDynamoError(tableName string, err error, operation string) {
   834  	if awsErr, ok := err.(awserr.Error); ok {
   835  		dynamoFailures.WithLabelValues(tableName, awsErr.Code(), operation).Add(float64(1))
   836  	} else {
   837  		dynamoFailures.WithLabelValues(tableName, otherError, operation).Add(float64(1))
   838  	}
   839  }
   840  
   841  // dynamoClientFromURL creates a new DynamoDB client from a URL.
   842  func dynamoClientFromURL(awsURL *url.URL) (dynamodbiface.DynamoDBAPI, error) {
   843  	dynamoDBSession, err := awsSessionFromURL(awsURL)
   844  	if err != nil {
   845  		return nil, err
   846  	}
   847  	return dynamodb.New(dynamoDBSession), nil
   848  }
   849  
   850  // awsSessionFromURL creates a new aws session from a URL.
   851  func awsSessionFromURL(awsURL *url.URL) (client.ConfigProvider, error) {
   852  	if awsURL == nil {
   853  		return nil, fmt.Errorf("no URL specified for DynamoDB")
   854  	}
   855  	path := strings.TrimPrefix(awsURL.Path, "/")
   856  	if len(path) > 0 {
   857  		level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
   858  	}
   859  	config, err := awscommon.ConfigFromURL(awsURL)
   860  	if err != nil {
   861  		return nil, err
   862  	}
   863  	config = config.WithMaxRetries(0) // We do our own retries, so we can monitor them
   864  	return session.New(config), nil
   865  }