github.com/sequix/cortex@v1.1.6/pkg/chunk/aws/dynamodb_storage_client.go (about) 1 package aws 2 3 import ( 4 "context" 5 "flag" 6 "fmt" 7 "net/url" 8 "strings" 9 "time" 10 11 "github.com/go-kit/kit/log/level" 12 ot "github.com/opentracing/opentracing-go" 13 "golang.org/x/time/rate" 14 15 "github.com/aws/aws-sdk-go/aws" 16 "github.com/aws/aws-sdk-go/aws/awserr" 17 "github.com/aws/aws-sdk-go/aws/client" 18 "github.com/aws/aws-sdk-go/aws/request" 19 "github.com/aws/aws-sdk-go/aws/session" 20 "github.com/aws/aws-sdk-go/service/dynamodb" 21 "github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface" 22 "github.com/prometheus/client_golang/prometheus" 23 24 "github.com/sequix/cortex/pkg/chunk" 25 chunk_util "github.com/sequix/cortex/pkg/chunk/util" 26 "github.com/sequix/cortex/pkg/util" 27 "github.com/sequix/cortex/pkg/util/flagext" 28 "github.com/sequix/cortex/pkg/util/spanlogger" 29 awscommon "github.com/weaveworks/common/aws" 30 "github.com/weaveworks/common/instrument" 31 "github.com/weaveworks/common/user" 32 ) 33 34 const ( 35 hashKey = "h" 36 rangeKey = "r" 37 valueKey = "c" 38 39 // For dynamodb errors 40 tableNameLabel = "table" 41 errorReasonLabel = "error" 42 otherError = "other" 43 44 // See http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html. 45 dynamoDBMaxWriteBatchSize = 25 46 dynamoDBMaxReadBatchSize = 100 47 validationException = "ValidationException" 48 ) 49 50 var ( 51 dynamoRequestDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{ 52 Namespace: "cortex", 53 Name: "dynamo_request_duration_seconds", 54 Help: "Time spent doing DynamoDB requests.", 55 56 // DynamoDB latency seems to range from a few ms to a few sec and is 57 // important. So use 8 buckets from 128us to 2s. 58 Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8), 59 }, []string{"operation", "status_code"})) 60 dynamoConsumedCapacity = prometheus.NewCounterVec(prometheus.CounterOpts{ 61 Namespace: "cortex", 62 Name: "dynamo_consumed_capacity_total", 63 Help: "The capacity units consumed by operation.", 64 }, []string{"operation", tableNameLabel}) 65 dynamoThrottled = prometheus.NewCounterVec(prometheus.CounterOpts{ 66 Namespace: "cortex", 67 Name: "dynamo_throttled_total", 68 Help: "The total number of throttled events.", 69 }, []string{"operation", tableNameLabel}) 70 dynamoFailures = prometheus.NewCounterVec(prometheus.CounterOpts{ 71 Namespace: "cortex", 72 Name: "dynamo_failures_total", 73 Help: "The total number of errors while storing chunks to the chunk store.", 74 }, []string{tableNameLabel, errorReasonLabel, "operation"}) 75 dynamoDroppedRequests = prometheus.NewCounterVec(prometheus.CounterOpts{ 76 Namespace: "cortex", 77 Name: "dynamo_dropped_requests_total", 78 Help: "The total number of requests which were dropped due to errors encountered from dynamo.", 79 }, []string{tableNameLabel, errorReasonLabel, "operation"}) 80 dynamoQueryPagesCount = prometheus.NewHistogram(prometheus.HistogramOpts{ 81 Namespace: "cortex", 82 Name: "dynamo_query_pages_count", 83 Help: "Number of pages per query.", 84 // Most queries will have one page, however this may increase with fuzzy 85 // metric names. 86 Buckets: prometheus.ExponentialBuckets(1, 4, 6), 87 }) 88 ) 89 90 func init() { 91 dynamoRequestDuration.Register() 92 prometheus.MustRegister(dynamoConsumedCapacity) 93 prometheus.MustRegister(dynamoThrottled) 94 prometheus.MustRegister(dynamoFailures) 95 prometheus.MustRegister(dynamoQueryPagesCount) 96 prometheus.MustRegister(dynamoDroppedRequests) 97 } 98 99 // DynamoDBConfig specifies config for a DynamoDB database. 100 type DynamoDBConfig struct { 101 DynamoDB flagext.URLValue 102 APILimit float64 103 ThrottleLimit float64 104 ApplicationAutoScaling flagext.URLValue 105 Metrics MetricsAutoScalingConfig 106 ChunkGangSize int 107 ChunkGetMaxParallelism int 108 backoffConfig util.BackoffConfig 109 } 110 111 // RegisterFlags adds the flags required to config this to the given FlagSet 112 func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) { 113 f.Var(&cfg.DynamoDB, "dynamodb.url", "DynamoDB endpoint URL with escaped Key and Secret encoded. "+ 114 "If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<table-name> to use a mock in-memory implementation.") 115 f.Float64Var(&cfg.APILimit, "dynamodb.api-limit", 2.0, "DynamoDB table management requests per second limit.") 116 f.Float64Var(&cfg.ThrottleLimit, "dynamodb.throttle-limit", 10.0, "DynamoDB rate cap to back off when throttled.") 117 f.Var(&cfg.ApplicationAutoScaling, "applicationautoscaling.url", "ApplicationAutoscaling endpoint URL with escaped Key and Secret encoded.") 118 f.IntVar(&cfg.ChunkGangSize, "dynamodb.chunk.gang.size", 10, "Number of chunks to group together to parallelise fetches (zero to disable)") 119 f.IntVar(&cfg.ChunkGetMaxParallelism, "dynamodb.chunk.get.max.parallelism", 32, "Max number of chunk-get operations to start in parallel") 120 f.DurationVar(&cfg.backoffConfig.MinBackoff, "dynamodb.min-backoff", 100*time.Millisecond, "Minimum backoff time") 121 f.DurationVar(&cfg.backoffConfig.MaxBackoff, "dynamodb.max-backoff", 50*time.Second, "Maximum backoff time") 122 f.IntVar(&cfg.backoffConfig.MaxRetries, "dynamodb.max-retries", 20, "Maximum number of times to retry an operation") 123 cfg.Metrics.RegisterFlags(f) 124 } 125 126 // StorageConfig specifies config for storing data on AWS. 127 type StorageConfig struct { 128 DynamoDBConfig 129 S3 flagext.URLValue 130 S3ForcePathStyle bool 131 } 132 133 // RegisterFlags adds the flags required to config this to the given FlagSet 134 func (cfg *StorageConfig) RegisterFlags(f *flag.FlagSet) { 135 cfg.DynamoDBConfig.RegisterFlags(f) 136 137 f.Var(&cfg.S3, "s3.url", "S3 endpoint URL with escaped Key and Secret encoded. "+ 138 "If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<bucket-name> to use a mock in-memory implementation.") 139 f.BoolVar(&cfg.S3ForcePathStyle, "s3.force-path-style", false, "Set this to `true` to force the request to use path-style addressing.") 140 } 141 142 type dynamoDBStorageClient struct { 143 cfg DynamoDBConfig 144 schemaCfg chunk.SchemaConfig 145 146 DynamoDB dynamodbiface.DynamoDBAPI 147 // These rate-limiters let us slow down when DynamoDB signals provision limits. 148 writeThrottle *rate.Limiter 149 150 // These functions exists for mocking, so we don't have to write a whole load 151 // of boilerplate. 152 queryRequestFn func(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest 153 batchGetItemRequestFn func(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest 154 batchWriteItemRequestFn func(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest 155 } 156 157 // NewDynamoDBIndexClient makes a new DynamoDB-backed IndexClient. 158 func NewDynamoDBIndexClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) { 159 return newDynamoDBStorageClient(cfg, schemaCfg) 160 } 161 162 // NewDynamoDBObjectClient makes a new DynamoDB-backed ObjectClient. 163 func NewDynamoDBObjectClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) { 164 return newDynamoDBStorageClient(cfg, schemaCfg) 165 } 166 167 // newDynamoDBStorageClient makes a new DynamoDB-backed IndexClient and ObjectClient. 168 func newDynamoDBStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (*dynamoDBStorageClient, error) { 169 dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL) 170 if err != nil { 171 return nil, err 172 } 173 174 client := &dynamoDBStorageClient{ 175 cfg: cfg, 176 schemaCfg: schemaCfg, 177 DynamoDB: dynamoDB, 178 writeThrottle: rate.NewLimiter(rate.Limit(cfg.ThrottleLimit), dynamoDBMaxWriteBatchSize), 179 } 180 client.queryRequestFn = client.queryRequest 181 client.batchGetItemRequestFn = client.batchGetItemRequest 182 client.batchWriteItemRequestFn = client.batchWriteItemRequest 183 return client, nil 184 } 185 186 // Stop implements chunk.IndexClient. 187 func (a dynamoDBStorageClient) Stop() { 188 } 189 190 // NewWriteBatch implements chunk.IndexClient. 191 func (a dynamoDBStorageClient) NewWriteBatch() chunk.WriteBatch { 192 return dynamoDBWriteBatch(map[string][]*dynamodb.WriteRequest{}) 193 } 194 195 func logWriteRetry(ctx context.Context, unprocessed dynamoDBWriteBatch) { 196 userID, _ := user.ExtractOrgID(ctx) 197 for table, reqs := range unprocessed { 198 dynamoThrottled.WithLabelValues("DynamoDB.BatchWriteItem", table).Add(float64(len(reqs))) 199 for _, req := range reqs { 200 item := req.PutRequest.Item 201 var hash, rnge string 202 if hashAttr, ok := item[hashKey]; ok { 203 if hashAttr.S != nil { 204 hash = *hashAttr.S 205 } 206 } 207 if rangeAttr, ok := item[rangeKey]; ok { 208 rnge = string(rangeAttr.B) 209 } 210 util.Event().Log("msg", "store retry", "table", table, "userID", userID, "hashKey", hash, "rangeKey", rnge) 211 } 212 } 213 } 214 215 // BatchWrite writes requests to the underlying storage, handling retries and backoff. 216 // Structure is identical to getDynamoDBChunks(), but operating on different datatypes 217 // so cannot share implementation. If you fix a bug here fix it there too. 218 func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) error { 219 outstanding := input.(dynamoDBWriteBatch) 220 unprocessed := dynamoDBWriteBatch{} 221 222 backoff := util.NewBackoff(ctx, a.cfg.backoffConfig) 223 224 for outstanding.Len()+unprocessed.Len() > 0 && backoff.Ongoing() { 225 requests := dynamoDBWriteBatch{} 226 requests.TakeReqs(outstanding, dynamoDBMaxWriteBatchSize) 227 requests.TakeReqs(unprocessed, dynamoDBMaxWriteBatchSize) 228 229 request := a.batchWriteItemRequestFn(ctx, &dynamodb.BatchWriteItemInput{ 230 RequestItems: requests, 231 ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal), 232 }) 233 234 err := instrument.CollectedRequest(ctx, "DynamoDB.BatchWriteItem", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error { 235 return request.Send() 236 }) 237 resp := request.Data().(*dynamodb.BatchWriteItemOutput) 238 239 for _, cc := range resp.ConsumedCapacity { 240 dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchWriteItem", *cc.TableName). 241 Add(float64(*cc.CapacityUnits)) 242 } 243 244 if err != nil { 245 for tableName := range requests { 246 recordDynamoError(tableName, err, "DynamoDB.BatchWriteItem") 247 } 248 249 // If we get provisionedThroughputExceededException, then no items were processed, 250 // so back off and retry all. 251 if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) { 252 logWriteRetry(ctx, requests) 253 unprocessed.TakeReqs(requests, -1) 254 a.writeThrottle.WaitN(ctx, len(requests)) 255 backoff.Wait() 256 continue 257 } else if ok && awsErr.Code() == validationException { 258 // this write will never work, so the only option is to drop the offending items and continue. 259 level.Warn(util.Logger).Log("msg", "Data lost while flushing to Dynamo", "err", awsErr) 260 level.Debug(util.Logger).Log("msg", "Dropped request details", "requests", requests) 261 util.Event().Log("msg", "ValidationException", "requests", requests) 262 // recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context 263 // to determine if a request was dropped (or not) 264 for tableName := range requests { 265 dynamoDroppedRequests.WithLabelValues(tableName, validationException, "DynamoDB.BatchWriteItem").Inc() 266 } 267 continue 268 } 269 270 // All other errors are critical. 271 return err 272 } 273 274 // If there are unprocessed items, retry those items. 275 unprocessedItems := dynamoDBWriteBatch(resp.UnprocessedItems) 276 if len(unprocessedItems) > 0 { 277 logWriteRetry(ctx, unprocessedItems) 278 a.writeThrottle.WaitN(ctx, unprocessedItems.Len()) 279 unprocessed.TakeReqs(unprocessedItems, -1) 280 } 281 282 backoff.Reset() 283 } 284 285 if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 { 286 return fmt.Errorf("failed to write chunk, %d values remaining: %s", valuesLeft, backoff.Err()) 287 } 288 return backoff.Err() 289 } 290 291 // QueryPages implements chunk.IndexClient. 292 func (a dynamoDBStorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error { 293 return chunk_util.DoParallelQueries(ctx, a.query, queries, callback) 294 } 295 296 func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error { 297 input := &dynamodb.QueryInput{ 298 TableName: aws.String(query.TableName), 299 KeyConditions: map[string]*dynamodb.Condition{ 300 hashKey: { 301 AttributeValueList: []*dynamodb.AttributeValue{ 302 {S: aws.String(query.HashValue)}, 303 }, 304 ComparisonOperator: aws.String(dynamodb.ComparisonOperatorEq), 305 }, 306 }, 307 ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal), 308 } 309 310 if query.RangeValuePrefix != nil { 311 input.KeyConditions[rangeKey] = &dynamodb.Condition{ 312 AttributeValueList: []*dynamodb.AttributeValue{ 313 {B: query.RangeValuePrefix}, 314 }, 315 ComparisonOperator: aws.String(dynamodb.ComparisonOperatorBeginsWith), 316 } 317 } else if query.RangeValueStart != nil { 318 input.KeyConditions[rangeKey] = &dynamodb.Condition{ 319 AttributeValueList: []*dynamodb.AttributeValue{ 320 {B: query.RangeValueStart}, 321 }, 322 ComparisonOperator: aws.String(dynamodb.ComparisonOperatorGe), 323 } 324 } 325 326 // Filters 327 if query.ValueEqual != nil { 328 input.FilterExpression = aws.String(fmt.Sprintf("%s = :v", valueKey)) 329 input.ExpressionAttributeValues = map[string]*dynamodb.AttributeValue{ 330 ":v": { 331 B: query.ValueEqual, 332 }, 333 } 334 } 335 336 request := a.queryRequestFn(ctx, input) 337 pageCount := 0 338 defer func() { 339 dynamoQueryPagesCount.Observe(float64(pageCount)) 340 }() 341 342 for page := request; page != nil; page = page.NextPage() { 343 pageCount++ 344 345 response, err := a.queryPage(ctx, input, page, query.HashValue, pageCount) 346 if err != nil { 347 return err 348 } 349 350 if !callback(response) { 351 if err != nil { 352 return fmt.Errorf("QueryPages error: table=%v, err=%v", *input.TableName, page.Error()) 353 } 354 return nil 355 } 356 if !page.HasNextPage() { 357 return nil 358 } 359 } 360 return nil 361 } 362 363 func (a dynamoDBStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest, hashValue string, pageCount int) (*dynamoDBReadResponse, error) { 364 backoff := util.NewBackoff(ctx, a.cfg.backoffConfig) 365 366 var err error 367 for backoff.Ongoing() { 368 err = instrument.CollectedRequest(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, instrument.ErrorCode, func(innerCtx context.Context) error { 369 if sp := ot.SpanFromContext(innerCtx); sp != nil { 370 sp.SetTag("tableName", aws.StringValue(input.TableName)) 371 sp.SetTag("hashValue", hashValue) 372 sp.SetTag("page", pageCount) 373 sp.SetTag("retry", backoff.NumRetries()) 374 } 375 return page.Send() 376 }) 377 378 if cc := page.Data().(*dynamodb.QueryOutput).ConsumedCapacity; cc != nil { 379 dynamoConsumedCapacity.WithLabelValues("DynamoDB.QueryPages", *cc.TableName). 380 Add(float64(*cc.CapacityUnits)) 381 } 382 383 if err != nil { 384 recordDynamoError(*input.TableName, err, "DynamoDB.QueryPages") 385 if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || page.Retryable()) { 386 if awsErr.Code() != dynamodb.ErrCodeProvisionedThroughputExceededException { 387 level.Warn(util.Logger).Log("msg", "DynamoDB error", "retry", backoff.NumRetries(), "table", *input.TableName, "err", err) 388 } 389 backoff.Wait() 390 continue 391 } 392 return nil, fmt.Errorf("QueryPage error: table=%v, err=%v", *input.TableName, err) 393 } 394 395 queryOutput := page.Data().(*dynamodb.QueryOutput) 396 return &dynamoDBReadResponse{ 397 items: queryOutput.Items, 398 }, nil 399 } 400 return nil, fmt.Errorf("QueryPage error: %s for table %v, last error %v", backoff.Err(), *input.TableName, err) 401 } 402 403 type dynamoDBRequest interface { 404 NextPage() dynamoDBRequest 405 Send() error 406 Data() interface{} 407 Error() error 408 HasNextPage() bool 409 Retryable() bool 410 } 411 412 func (a dynamoDBStorageClient) queryRequest(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest { 413 req, _ := a.DynamoDB.QueryRequest(input) 414 req.SetContext(ctx) 415 return dynamoDBRequestAdapter{req} 416 } 417 418 func (a dynamoDBStorageClient) batchGetItemRequest(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest { 419 req, _ := a.DynamoDB.BatchGetItemRequest(input) 420 req.SetContext(ctx) 421 return dynamoDBRequestAdapter{req} 422 } 423 424 func (a dynamoDBStorageClient) batchWriteItemRequest(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest { 425 req, _ := a.DynamoDB.BatchWriteItemRequest(input) 426 req.SetContext(ctx) 427 return dynamoDBRequestAdapter{req} 428 } 429 430 type dynamoDBRequestAdapter struct { 431 request *request.Request 432 } 433 434 func (a dynamoDBRequestAdapter) NextPage() dynamoDBRequest { 435 next := a.request.NextPage() 436 if next == nil { 437 return nil 438 } 439 return dynamoDBRequestAdapter{next} 440 } 441 442 func (a dynamoDBRequestAdapter) Data() interface{} { 443 return a.request.Data 444 } 445 446 func (a dynamoDBRequestAdapter) Send() error { 447 // Clear error in case we are retrying the same operation - if we 448 // don't do this then the same error will come back again immediately 449 a.request.Error = nil 450 return a.request.Send() 451 } 452 453 func (a dynamoDBRequestAdapter) Error() error { 454 return a.request.Error 455 } 456 457 func (a dynamoDBRequestAdapter) HasNextPage() bool { 458 return a.request.HasNextPage() 459 } 460 461 func (a dynamoDBRequestAdapter) Retryable() bool { 462 return aws.BoolValue(a.request.Retryable) 463 } 464 465 type chunksPlusError struct { 466 chunks []chunk.Chunk 467 err error 468 } 469 470 // GetChunks implements chunk.ObjectClient. 471 func (a dynamoDBStorageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) { 472 log, ctx := spanlogger.New(ctx, "GetChunks.DynamoDB", ot.Tag{Key: "numChunks", Value: len(chunks)}) 473 defer log.Span.Finish() 474 level.Debug(log).Log("chunks requested", len(chunks)) 475 476 dynamoDBChunks := chunks 477 var err error 478 479 gangSize := a.cfg.ChunkGangSize * dynamoDBMaxReadBatchSize 480 if gangSize == 0 { // zero means turn feature off 481 gangSize = len(dynamoDBChunks) 482 } else { 483 if len(dynamoDBChunks)/gangSize > a.cfg.ChunkGetMaxParallelism { 484 gangSize = len(dynamoDBChunks)/a.cfg.ChunkGetMaxParallelism + 1 485 } 486 } 487 488 results := make(chan chunksPlusError) 489 for i := 0; i < len(dynamoDBChunks); i += gangSize { 490 go func(start int) { 491 end := start + gangSize 492 if end > len(dynamoDBChunks) { 493 end = len(dynamoDBChunks) 494 } 495 outChunks, err := a.getDynamoDBChunks(ctx, dynamoDBChunks[start:end]) 496 results <- chunksPlusError{outChunks, err} 497 }(i) 498 } 499 finalChunks := []chunk.Chunk{} 500 for i := 0; i < len(dynamoDBChunks); i += gangSize { 501 in := <-results 502 if in.err != nil { 503 err = in.err // TODO: cancel other sub-queries at this point 504 } 505 finalChunks = append(finalChunks, in.chunks...) 506 } 507 level.Debug(log).Log("chunks fetched", len(finalChunks)) 508 509 // Return any chunks we did receive: a partial result may be useful 510 return finalChunks, log.Error(err) 511 } 512 513 // As we're re-using the DynamoDB schema from the index for the chunk tables, 514 // we need to provide a non-null, non-empty value for the range value. 515 var placeholder = []byte{'c'} 516 517 // Fetch a set of chunks from DynamoDB, handling retries and backoff. 518 // Structure is identical to BatchWrite(), but operating on different datatypes 519 // so cannot share implementation. If you fix a bug here fix it there too. 520 func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) { 521 log, ctx := spanlogger.New(ctx, "getDynamoDBChunks", ot.Tag{Key: "numChunks", Value: len(chunks)}) 522 defer log.Span.Finish() 523 outstanding := dynamoDBReadRequest{} 524 chunksByKey := map[string]chunk.Chunk{} 525 for _, chunk := range chunks { 526 key := chunk.ExternalKey() 527 chunksByKey[key] = chunk 528 tableName, err := a.schemaCfg.ChunkTableFor(chunk.From) 529 if err != nil { 530 return nil, log.Error(err) 531 } 532 outstanding.Add(tableName, key, placeholder) 533 } 534 535 result := []chunk.Chunk{} 536 unprocessed := dynamoDBReadRequest{} 537 backoff := util.NewBackoff(ctx, a.cfg.backoffConfig) 538 539 for outstanding.Len()+unprocessed.Len() > 0 && backoff.Ongoing() { 540 requests := dynamoDBReadRequest{} 541 requests.TakeReqs(outstanding, dynamoDBMaxReadBatchSize) 542 requests.TakeReqs(unprocessed, dynamoDBMaxReadBatchSize) 543 544 request := a.batchGetItemRequestFn(ctx, &dynamodb.BatchGetItemInput{ 545 RequestItems: requests, 546 ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal), 547 }) 548 549 err := instrument.CollectedRequest(ctx, "DynamoDB.BatchGetItemPages", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error { 550 return request.Send() 551 }) 552 response := request.Data().(*dynamodb.BatchGetItemOutput) 553 554 for _, cc := range response.ConsumedCapacity { 555 dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchGetItemPages", *cc.TableName). 556 Add(float64(*cc.CapacityUnits)) 557 } 558 559 if err != nil { 560 for tableName := range requests { 561 recordDynamoError(tableName, err, "DynamoDB.BatchGetItemPages") 562 } 563 564 // If we get provisionedThroughputExceededException, then no items were processed, 565 // so back off and retry all. 566 if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) { 567 unprocessed.TakeReqs(requests, -1) 568 backoff.Wait() 569 continue 570 } else if ok && awsErr.Code() == validationException { 571 // this read will never work, so the only option is to drop the offending request and continue. 572 level.Warn(log).Log("msg", "Error while fetching data from Dynamo", "err", awsErr) 573 level.Debug(log).Log("msg", "Dropped request details", "requests", requests) 574 // recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context 575 // to determine if a request was dropped (or not) 576 for tableName := range requests { 577 dynamoDroppedRequests.WithLabelValues(tableName, validationException, "DynamoDB.BatchGetItemPages").Inc() 578 } 579 continue 580 } 581 582 // All other errors are critical. 583 return nil, err 584 } 585 586 processedChunks, err := processChunkResponse(response, chunksByKey) 587 if err != nil { 588 return nil, log.Error(err) 589 } 590 result = append(result, processedChunks...) 591 592 // If there are unprocessed items, retry those items. 593 if unprocessedKeys := response.UnprocessedKeys; unprocessedKeys != nil && dynamoDBReadRequest(unprocessedKeys).Len() > 0 { 594 unprocessed.TakeReqs(unprocessedKeys, -1) 595 } 596 597 backoff.Reset() 598 } 599 600 if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 { 601 // Return the chunks we did fetch, because partial results may be useful 602 return result, log.Error(fmt.Errorf("failed to query chunks, %d values remaining: %s", valuesLeft, backoff.Err())) 603 } 604 return result, nil 605 } 606 607 func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map[string]chunk.Chunk) ([]chunk.Chunk, error) { 608 result := []chunk.Chunk{} 609 decodeContext := chunk.NewDecodeContext() 610 for _, items := range response.Responses { 611 for _, item := range items { 612 key, ok := item[hashKey] 613 if !ok || key == nil || key.S == nil { 614 return nil, fmt.Errorf("Got response from DynamoDB with no hash key: %+v", item) 615 } 616 617 chunk, ok := chunksByKey[*key.S] 618 if !ok { 619 return nil, fmt.Errorf("Got response from DynamoDB with chunk I didn't ask for: %s", *key.S) 620 } 621 622 buf, ok := item[valueKey] 623 if !ok || buf == nil || buf.B == nil { 624 return nil, fmt.Errorf("Got response from DynamoDB with no value: %+v", item) 625 } 626 627 if err := chunk.Decode(decodeContext, buf.B); err != nil { 628 return nil, err 629 } 630 631 result = append(result, chunk) 632 } 633 } 634 return result, nil 635 } 636 637 // PutChunkAndIndex implements chunk.ObjectAndIndexClient 638 // Combine both sets of writes before sending to DynamoDB, for performance 639 func (a dynamoDBStorageClient) PutChunkAndIndex(ctx context.Context, c chunk.Chunk, index chunk.WriteBatch) error { 640 dynamoDBWrites, err := a.writesForChunks([]chunk.Chunk{c}) 641 if err != nil { 642 return err 643 } 644 dynamoDBWrites.TakeReqs(index.(dynamoDBWriteBatch), 0) 645 return a.BatchWrite(ctx, dynamoDBWrites) 646 } 647 648 // PutChunks implements chunk.ObjectClient. 649 func (a dynamoDBStorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error { 650 dynamoDBWrites, err := a.writesForChunks(chunks) 651 if err != nil { 652 return err 653 } 654 return a.BatchWrite(ctx, dynamoDBWrites) 655 } 656 657 func (a dynamoDBStorageClient) writesForChunks(chunks []chunk.Chunk) (dynamoDBWriteBatch, error) { 658 var ( 659 dynamoDBWrites = dynamoDBWriteBatch{} 660 ) 661 662 for i := range chunks { 663 buf, err := chunks[i].Encoded() 664 if err != nil { 665 return nil, err 666 } 667 key := chunks[i].ExternalKey() 668 669 table, err := a.schemaCfg.ChunkTableFor(chunks[i].From) 670 if err != nil { 671 return nil, err 672 } 673 674 dynamoDBWrites.Add(table, key, placeholder, buf) 675 } 676 677 return dynamoDBWrites, nil 678 } 679 680 // Slice of values returned; map key is attribute name 681 type dynamoDBReadResponse struct { 682 items []map[string]*dynamodb.AttributeValue 683 } 684 685 func (b *dynamoDBReadResponse) Iterator() chunk.ReadBatchIterator { 686 return &dynamoDBReadResponseIterator{ 687 i: -1, 688 dynamoDBReadResponse: b, 689 } 690 } 691 692 type dynamoDBReadResponseIterator struct { 693 i int 694 *dynamoDBReadResponse 695 } 696 697 func (b *dynamoDBReadResponseIterator) Next() bool { 698 b.i++ 699 return b.i < len(b.items) 700 } 701 702 func (b *dynamoDBReadResponseIterator) RangeValue() []byte { 703 return b.items[b.i][rangeKey].B 704 } 705 706 func (b *dynamoDBReadResponseIterator) Value() []byte { 707 chunkValue, ok := b.items[b.i][valueKey] 708 if !ok { 709 return nil 710 } 711 return chunkValue.B 712 } 713 714 // map key is table name; value is a slice of things to 'put' 715 type dynamoDBWriteBatch map[string][]*dynamodb.WriteRequest 716 717 func (b dynamoDBWriteBatch) Len() int { 718 result := 0 719 for _, reqs := range b { 720 result += len(reqs) 721 } 722 return result 723 } 724 725 func (b dynamoDBWriteBatch) String() string { 726 var sb strings.Builder 727 sb.WriteByte('{') 728 for k, reqs := range b { 729 sb.WriteString(k) 730 sb.WriteString(": [") 731 for _, req := range reqs { 732 sb.WriteString(req.String()) 733 sb.WriteByte(',') 734 } 735 sb.WriteString("], ") 736 } 737 sb.WriteByte('}') 738 return sb.String() 739 } 740 741 func (b dynamoDBWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) { 742 item := map[string]*dynamodb.AttributeValue{ 743 hashKey: {S: aws.String(hashValue)}, 744 rangeKey: {B: rangeValue}, 745 } 746 747 if value != nil { 748 item[valueKey] = &dynamodb.AttributeValue{B: value} 749 } 750 751 b[tableName] = append(b[tableName], &dynamodb.WriteRequest{ 752 PutRequest: &dynamodb.PutRequest{ 753 Item: item, 754 }, 755 }) 756 } 757 758 // Fill 'b' with WriteRequests from 'from' until 'b' has at most max requests. Remove those requests from 'from'. 759 func (b dynamoDBWriteBatch) TakeReqs(from dynamoDBWriteBatch, max int) { 760 outLen, inLen := b.Len(), from.Len() 761 toFill := inLen 762 if max > 0 { 763 toFill = util.Min(inLen, max-outLen) 764 } 765 for toFill > 0 { 766 for tableName, fromReqs := range from { 767 taken := util.Min(len(fromReqs), toFill) 768 if taken > 0 { 769 b[tableName] = append(b[tableName], fromReqs[:taken]...) 770 from[tableName] = fromReqs[taken:] 771 toFill -= taken 772 } 773 } 774 } 775 } 776 777 // map key is table name 778 type dynamoDBReadRequest map[string]*dynamodb.KeysAndAttributes 779 780 func (b dynamoDBReadRequest) Len() int { 781 result := 0 782 for _, reqs := range b { 783 result += len(reqs.Keys) 784 } 785 return result 786 } 787 788 func (b dynamoDBReadRequest) Add(tableName, hashValue string, rangeValue []byte) { 789 requests, ok := b[tableName] 790 if !ok { 791 requests = &dynamodb.KeysAndAttributes{ 792 AttributesToGet: []*string{ 793 aws.String(hashKey), 794 aws.String(valueKey), 795 }, 796 } 797 b[tableName] = requests 798 } 799 requests.Keys = append(requests.Keys, map[string]*dynamodb.AttributeValue{ 800 hashKey: {S: aws.String(hashValue)}, 801 rangeKey: {B: rangeValue}, 802 }) 803 } 804 805 // Fill 'b' with ReadRequests from 'from' until 'b' has at most max requests. Remove those requests from 'from'. 806 func (b dynamoDBReadRequest) TakeReqs(from dynamoDBReadRequest, max int) { 807 outLen, inLen := b.Len(), from.Len() 808 toFill := inLen 809 if max > 0 { 810 toFill = util.Min(inLen, max-outLen) 811 } 812 for toFill > 0 { 813 for tableName, fromReqs := range from { 814 taken := util.Min(len(fromReqs.Keys), toFill) 815 if taken > 0 { 816 if _, ok := b[tableName]; !ok { 817 b[tableName] = &dynamodb.KeysAndAttributes{ 818 AttributesToGet: []*string{ 819 aws.String(hashKey), 820 aws.String(valueKey), 821 }, 822 } 823 } 824 825 b[tableName].Keys = append(b[tableName].Keys, fromReqs.Keys[:taken]...) 826 from[tableName].Keys = fromReqs.Keys[taken:] 827 toFill -= taken 828 } 829 } 830 } 831 } 832 833 func recordDynamoError(tableName string, err error, operation string) { 834 if awsErr, ok := err.(awserr.Error); ok { 835 dynamoFailures.WithLabelValues(tableName, awsErr.Code(), operation).Add(float64(1)) 836 } else { 837 dynamoFailures.WithLabelValues(tableName, otherError, operation).Add(float64(1)) 838 } 839 } 840 841 // dynamoClientFromURL creates a new DynamoDB client from a URL. 842 func dynamoClientFromURL(awsURL *url.URL) (dynamodbiface.DynamoDBAPI, error) { 843 dynamoDBSession, err := awsSessionFromURL(awsURL) 844 if err != nil { 845 return nil, err 846 } 847 return dynamodb.New(dynamoDBSession), nil 848 } 849 850 // awsSessionFromURL creates a new aws session from a URL. 851 func awsSessionFromURL(awsURL *url.URL) (client.ConfigProvider, error) { 852 if awsURL == nil { 853 return nil, fmt.Errorf("no URL specified for DynamoDB") 854 } 855 path := strings.TrimPrefix(awsURL.Path, "/") 856 if len(path) > 0 { 857 level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path) 858 } 859 config, err := awscommon.ConfigFromURL(awsURL) 860 if err != nil { 861 return nil, err 862 } 863 config = config.WithMaxRetries(0) // We do our own retries, so we can monitor them 864 return session.New(config), nil 865 }