github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/kv/dynamodb/store.go (about) 1 package dynamodb 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "fmt" 8 "net/http" 9 "sort" 10 "sync" 11 "time" 12 13 "github.com/aws/aws-sdk-go-v2/aws" 14 "github.com/aws/aws-sdk-go-v2/aws/retry" 15 awshttp "github.com/aws/aws-sdk-go-v2/aws/transport/http" 16 "github.com/aws/aws-sdk-go-v2/config" 17 "github.com/aws/aws-sdk-go-v2/credentials" 18 "github.com/aws/aws-sdk-go-v2/feature/dynamodb/attributevalue" 19 "github.com/aws/aws-sdk-go-v2/feature/dynamodb/expression" 20 "github.com/aws/aws-sdk-go-v2/service/dynamodb" 21 "github.com/aws/aws-sdk-go-v2/service/dynamodb/types" 22 "github.com/treeverse/lakefs/pkg/kv" 23 "github.com/treeverse/lakefs/pkg/kv/kvparams" 24 "github.com/treeverse/lakefs/pkg/logging" 25 ) 26 27 type Driver struct{} 28 29 type Store struct { 30 svc *dynamodb.Client 31 params *kvparams.DynamoDB 32 wg sync.WaitGroup 33 logger logging.Logger 34 cancel chan bool 35 } 36 37 type EntriesIterator struct { 38 partitionKey []byte 39 startKey []byte 40 exclusiveStartKey map[string]types.AttributeValue 41 42 scanCtx context.Context 43 entry *kv.Entry 44 err error 45 store *Store 46 queryResult *dynamodb.QueryOutput 47 currEntryIdx int 48 limit int64 49 } 50 51 type DynKVItem struct { 52 PartitionKey []byte 53 ItemKey []byte 54 ItemValue []byte 55 } 56 57 const ( 58 DriverName = "dynamodb" 59 60 PartitionKey = "PartitionKey" 61 ItemKey = "ItemKey" 62 ItemValue = "ItemValue" 63 ) 64 65 //nolint:gochecknoinits 66 func init() { 67 kv.Register(DriverName, &Driver{}) 68 } 69 70 // Open - opens and returns a KV store over DynamoDB. This function creates the DB session 71 // and sets up the KV table. 72 func (d *Driver) Open(ctx context.Context, kvParams kvparams.Config) (kv.Store, error) { 73 params := kvParams.DynamoDB 74 if params == nil { 75 return nil, fmt.Errorf("missing %s settings: %w", DriverName, kv.ErrDriverConfiguration) 76 } 77 78 var opts []func(*config.LoadOptions) error 79 if params.AwsRegion != "" { 80 opts = append(opts, config.WithRegion(params.AwsRegion)) 81 } 82 if params.AwsProfile != "" { 83 opts = append(opts, 84 config.WithSharedConfigProfile(params.AwsProfile)) 85 } 86 if params.AwsAccessKeyID != "" { 87 opts = append(opts, config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider( 88 params.AwsAccessKeyID, 89 params.AwsSecretAccessKey, 90 "", 91 ))) 92 } 93 const credsCacheExpiryWindow = 30 * time.Second 94 const credsCacheExpiryWindowJitterFrac = 0.5 95 opts = append(opts, config.WithHTTPClient( 96 awshttp.NewBuildableClient().WithTransportOptions(func(transport *http.Transport) { 97 transport.MaxConnsPerHost = params.MaxConnectionsPerHost 98 })), 99 config.WithRetryer(func() aws.Retryer { 100 return retry.NewStandard(func(so *retry.StandardOptions) { 101 so.RateLimiter = &NopRateLimiter{} 102 so.MaxAttempts = params.MaxAttempts 103 }) 104 }), 105 config.WithCredentialsCacheOptions(func(options *aws.CredentialsCacheOptions) { 106 options.ExpiryWindow = credsCacheExpiryWindow 107 options.ExpiryWindowJitterFrac = credsCacheExpiryWindowJitterFrac 108 }), 109 ) 110 111 cfg, err := config.LoadDefaultConfig(ctx, opts...) 112 if err != nil { 113 return nil, err 114 } 115 116 // Create DynamoDB client 117 svc := dynamodb.NewFromConfig(cfg, func(o *dynamodb.Options) { 118 if params.Endpoint != "" { 119 o.BaseEndpoint = ¶ms.Endpoint 120 } 121 }) 122 123 // Create table if not exists. 124 // To avoid potential errors in restricted environments, we confirmed the existence of the table beforehand. 125 success, _ := isTableExist(ctx, svc, params.TableName) 126 if !success { 127 err := setupKeyValueDatabase(ctx, svc, params) 128 if err != nil { 129 return nil, fmt.Errorf("%w: %s", kv.ErrSetupFailed, err) 130 } 131 } 132 133 logger := logging.FromContext(ctx).WithField("store", DriverName) 134 s := &Store{ 135 svc: svc, 136 params: params, 137 logger: logger, 138 cancel: make(chan bool), 139 } 140 141 s.StartPeriodicCheck() 142 return s, nil 143 } 144 145 // isTableExist will try to describeTable and return bool status, error is returned only in case err != ResourceNotFoundException 146 func isTableExist(ctx context.Context, svc *dynamodb.Client, table string) (bool, error) { 147 _, err := svc.DescribeTable(ctx, &dynamodb.DescribeTableInput{ 148 TableName: aws.String(table), 149 }) 150 if err != nil { 151 var errResNotFound *types.ResourceNotFoundException 152 if errors.As(err, &errResNotFound) { 153 return false, nil 154 } 155 return false, err 156 } 157 return true, nil 158 } 159 160 // setupKeyValueDatabase setup everything required to enable kv over postgres 161 func setupKeyValueDatabase(ctx context.Context, svc *dynamodb.Client, params *kvparams.DynamoDB) error { 162 log := logging.FromContext(ctx).WithField("table_name", params.TableName) 163 start := time.Now() 164 defer func() { 165 log.WithField("took", fmt.Sprint(time.Since(start))).Info("Setup time") 166 }() 167 168 // main kv table 169 _, err := svc.CreateTable(ctx, &dynamodb.CreateTableInput{ 170 TableName: aws.String(params.TableName), 171 BillingMode: types.BillingModePayPerRequest, // On-Demand 172 AttributeDefinitions: []types.AttributeDefinition{ 173 { 174 AttributeName: aws.String(PartitionKey), 175 AttributeType: types.ScalarAttributeTypeB, 176 }, 177 { 178 AttributeName: aws.String(ItemKey), 179 AttributeType: types.ScalarAttributeTypeB, 180 }, 181 }, 182 KeySchema: []types.KeySchemaElement{ 183 { 184 AttributeName: aws.String(PartitionKey), 185 KeyType: types.KeyTypeHash, 186 }, 187 { 188 AttributeName: aws.String(ItemKey), 189 KeyType: types.KeyTypeRange, 190 }, 191 }, 192 }) 193 if err != nil { 194 var errResInUse *types.ResourceInUseException 195 if errors.As(err, &errResInUse) { 196 log.Info("KV table exists") 197 return nil 198 } 199 log.WithError(err).Warn("Failed to create or detect KV table") 200 return err 201 } 202 203 const ( 204 // Wait for ~30 seconds, at a nearly constant rate 205 minDelay = 750 * time.Millisecond 206 maxDelay = 3 * time.Second 207 maxWait = 30 * time.Second 208 ) 209 210 waiter := dynamodb.NewTableExistsWaiter(svc, func(o *dynamodb.TableExistsWaiterOptions) { 211 // override minimum delay to 10 seconds 212 o.MinDelay = minDelay 213 o.MaxDelay = maxDelay 214 }) 215 216 input := &dynamodb.DescribeTableInput{TableName: aws.String(params.TableName)} 217 err = waiter.Wait(ctx, input, maxWait) 218 219 return err 220 } 221 222 func (s *Store) bytesKeyToDynamoKey(partitionKey, key []byte) map[string]types.AttributeValue { 223 return map[string]types.AttributeValue{ 224 PartitionKey: &types.AttributeValueMemberB{ 225 Value: partitionKey, 226 }, 227 ItemKey: &types.AttributeValueMemberB{ 228 Value: key, 229 }, 230 } 231 } 232 233 func (s *Store) Get(ctx context.Context, partitionKey, key []byte) (*kv.ValueWithPredicate, error) { 234 if len(partitionKey) == 0 { 235 return nil, kv.ErrMissingPartitionKey 236 } 237 if len(key) == 0 { 238 return nil, kv.ErrMissingKey 239 } 240 result, err := s.svc.GetItem(ctx, &dynamodb.GetItemInput{ 241 TableName: aws.String(s.params.TableName), 242 Key: s.bytesKeyToDynamoKey(partitionKey, key), 243 ConsistentRead: aws.Bool(true), 244 ReturnConsumedCapacity: types.ReturnConsumedCapacityTotal, 245 }) 246 const operation = "GetItem" 247 if err != nil { 248 if s.isSlowDownErr(err) { 249 s.logger.WithField("partition_key", partitionKey).WithContext(ctx).Error("get item: %w", kv.ErrSlowDown) 250 dynamoSlowdown.WithLabelValues(operation).Inc() 251 err = errors.Join(err, kv.ErrSlowDown) 252 } 253 return nil, fmt.Errorf("get item: %w", err) 254 } 255 if result.ConsumedCapacity != nil { 256 dynamoConsumedCapacity.WithLabelValues(operation).Add(*result.ConsumedCapacity.CapacityUnits) 257 } 258 259 if result.Item == nil { 260 return nil, kv.ErrNotFound 261 } 262 263 var item DynKVItem 264 err = attributevalue.UnmarshalMap(result.Item, &item) 265 if err != nil { 266 return nil, fmt.Errorf("unmarshal map: %w", err) 267 } 268 269 return &kv.ValueWithPredicate{ 270 Value: item.ItemValue, 271 Predicate: kv.Predicate(item.ItemValue), 272 }, nil 273 } 274 275 func (s *Store) Set(ctx context.Context, partitionKey, key, value []byte) error { 276 return s.setWithOptionalPredicate(ctx, partitionKey, key, value, nil, false) 277 } 278 279 func (s *Store) SetIf(ctx context.Context, partitionKey, key, value []byte, valuePredicate kv.Predicate) error { 280 return s.setWithOptionalPredicate(ctx, partitionKey, key, value, valuePredicate, true) 281 } 282 283 func (s *Store) setWithOptionalPredicate(ctx context.Context, partitionKey, key, value []byte, valuePredicate kv.Predicate, usePredicate bool) error { 284 if len(partitionKey) == 0 { 285 return kv.ErrMissingPartitionKey 286 } 287 if len(key) == 0 { 288 return kv.ErrMissingKey 289 } 290 if value == nil { 291 return kv.ErrMissingValue 292 } 293 294 item := DynKVItem{ 295 PartitionKey: partitionKey, 296 ItemKey: key, 297 ItemValue: value, 298 } 299 300 marshaledItem, err := attributevalue.MarshalMap(item) 301 if err != nil { 302 return fmt.Errorf("marshal map: %w", err) 303 } 304 305 input := &dynamodb.PutItemInput{ 306 Item: marshaledItem, 307 TableName: &s.params.TableName, 308 ReturnConsumedCapacity: types.ReturnConsumedCapacityTotal, 309 } 310 if usePredicate { 311 switch valuePredicate { 312 case nil: // Set only if not exists 313 input.ConditionExpression = aws.String("attribute_not_exists(" + ItemValue + ")") 314 315 case kv.PrecondConditionalExists: // update only if exists 316 input.ConditionExpression = aws.String("attribute_exists(" + ItemValue + ")") 317 318 default: // update only if predicate matches the current stored value 319 predicateCondition := expression.Name(ItemValue).Equal(expression.Value(valuePredicate.([]byte))) 320 conditionExpression, err := expression.NewBuilder().WithCondition(predicateCondition).Build() 321 if err != nil { 322 return fmt.Errorf("build condition expression: %w", err) 323 } 324 input.ExpressionAttributeNames = conditionExpression.Names() 325 input.ExpressionAttributeValues = conditionExpression.Values() 326 input.ConditionExpression = conditionExpression.Condition() 327 } 328 } 329 330 resp, err := s.svc.PutItem(ctx, input) 331 const operation = "PutItem" 332 if err != nil { 333 var errConditionalCheckFailed *types.ConditionalCheckFailedException 334 if usePredicate && errors.As(err, &errConditionalCheckFailed) { 335 return kv.ErrPredicateFailed 336 } 337 if s.isSlowDownErr(err) { 338 s.logger.WithField("partition_key", partitionKey).WithContext(ctx).Error("put item: %w", kv.ErrSlowDown) 339 dynamoSlowdown.WithLabelValues(operation).Inc() 340 err = errors.Join(err, kv.ErrSlowDown) 341 } 342 return fmt.Errorf("put item: %w", err) 343 } 344 if resp.ConsumedCapacity != nil { 345 dynamoConsumedCapacity.WithLabelValues(operation).Add(*resp.ConsumedCapacity.CapacityUnits) 346 } 347 return nil 348 } 349 350 func (s *Store) Delete(ctx context.Context, partitionKey, key []byte) error { 351 if len(partitionKey) == 0 { 352 return kv.ErrMissingPartitionKey 353 } 354 if len(key) == 0 { 355 return kv.ErrMissingKey 356 } 357 358 resp, err := s.svc.DeleteItem(ctx, &dynamodb.DeleteItemInput{ 359 TableName: aws.String(s.params.TableName), 360 Key: s.bytesKeyToDynamoKey(partitionKey, key), 361 ReturnConsumedCapacity: types.ReturnConsumedCapacityTotal, 362 }) 363 const operation = "DeleteItem" 364 if err != nil { 365 if s.isSlowDownErr(err) { 366 s.logger.WithField("partition_key", partitionKey).WithContext(ctx).Error("delete item: %w", kv.ErrSlowDown) 367 dynamoSlowdown.WithLabelValues(operation).Inc() 368 err = errors.Join(err, kv.ErrSlowDown) 369 } 370 return fmt.Errorf("delete item: %w", err) 371 } 372 if resp.ConsumedCapacity != nil { 373 dynamoConsumedCapacity.WithLabelValues(operation).Add(*resp.ConsumedCapacity.CapacityUnits) 374 } 375 return nil 376 } 377 378 func (s *Store) Scan(ctx context.Context, partitionKey []byte, options kv.ScanOptions) (kv.EntriesIterator, error) { 379 if len(partitionKey) == 0 { 380 return nil, kv.ErrMissingPartitionKey 381 } 382 // limit set to the minimum 'params.ScanLimit' and 'options.BatchSize', unless 0 (not set) 383 limit := s.params.ScanLimit 384 batchSize := int64(options.BatchSize) 385 if batchSize != 0 && limit != 0 && batchSize < limit { 386 limit = batchSize 387 } 388 it := &EntriesIterator{ 389 partitionKey: partitionKey, 390 startKey: options.KeyStart, 391 scanCtx: ctx, 392 store: s, 393 limit: limit, 394 } 395 it.runQuery() 396 if it.err != nil { 397 err := it.err 398 if s.isSlowDownErr(it.err) { 399 s.logger.WithField("partition_key", partitionKey).WithContext(ctx).Error("scan: %w", kv.ErrSlowDown) 400 dynamoSlowdown.WithLabelValues("Scan").Inc() 401 err = errors.Join(err, kv.ErrSlowDown) 402 } 403 return nil, err 404 } 405 return it, nil 406 } 407 408 func (s *Store) Close() { 409 s.StopPeriodicCheck() 410 } 411 412 // DropTable used internally for testing purposes 413 func (s *Store) DropTable() error { 414 ctx := context.Background() 415 _, err := s.svc.DeleteTable(ctx, &dynamodb.DeleteTableInput{ 416 TableName: &s.params.TableName, 417 }) 418 if s.isSlowDownErr(err) { 419 s.logger.WithField("table", s.params.TableName).WithContext(ctx).Error("drop table: %w", kv.ErrSlowDown) 420 dynamoSlowdown.WithLabelValues("DeleteTable").Inc() 421 err = errors.Join(err, kv.ErrSlowDown) 422 } 423 return err 424 } 425 426 func (e *EntriesIterator) SeekGE(key []byte) { 427 if !e.isInRange(key) { 428 e.startKey = key 429 e.exclusiveStartKey = nil 430 e.runQuery() 431 return 432 } 433 var item DynKVItem 434 e.currEntryIdx = sort.Search(len(e.queryResult.Items), func(i int) bool { 435 if e.err = attributevalue.UnmarshalMap(e.queryResult.Items[i], &item); e.err != nil { 436 return false 437 } 438 return bytes.Compare(key, item.ItemKey) <= 0 439 }) 440 if e.currEntryIdx == -1 { 441 // not found, set to the end 442 e.currEntryIdx = len(e.queryResult.Items) 443 } 444 } 445 446 func (e *EntriesIterator) Next() bool { 447 if e.err != nil { 448 return false 449 } 450 // check if we reached the end of the current queryResult, this can be called twice in case runQuery returned an empty result 451 for e.currEntryIdx == len(e.queryResult.Items) { 452 if e.queryResult.LastEvaluatedKey == nil { 453 return false 454 } 455 e.exclusiveStartKey = e.queryResult.LastEvaluatedKey 456 e.runQuery() 457 if e.err != nil { 458 return false 459 } 460 } 461 var item DynKVItem 462 e.err = attributevalue.UnmarshalMap(e.queryResult.Items[e.currEntryIdx], &item) 463 if e.err != nil { 464 return false 465 } 466 e.entry = &kv.Entry{ 467 Key: item.ItemKey, 468 Value: item.ItemValue, 469 } 470 e.currEntryIdx++ 471 return true 472 } 473 474 func (e *EntriesIterator) Entry() *kv.Entry { 475 return e.entry 476 } 477 478 func (e *EntriesIterator) Err() error { 479 return e.err 480 } 481 482 func (e *EntriesIterator) Close() { 483 e.err = kv.ErrClosedEntries 484 } 485 486 func (e *EntriesIterator) runQuery() { 487 expressionAttributeValues := map[string]types.AttributeValue{ 488 ":partitionkey": &types.AttributeValueMemberB{ 489 Value: e.partitionKey, 490 }, 491 } 492 keyConditionExpression := PartitionKey + " = :partitionkey" 493 if len(e.startKey) > 0 { 494 keyConditionExpression += " AND " + ItemKey + " >= :fromkey" 495 expressionAttributeValues[":fromkey"] = &types.AttributeValueMemberB{ 496 Value: e.startKey, 497 } 498 } 499 queryInput := &dynamodb.QueryInput{ 500 TableName: aws.String(e.store.params.TableName), 501 KeyConditionExpression: aws.String(keyConditionExpression), 502 ExpressionAttributeValues: expressionAttributeValues, 503 ConsistentRead: aws.Bool(true), 504 ScanIndexForward: aws.Bool(true), 505 ExclusiveStartKey: e.exclusiveStartKey, 506 ReturnConsumedCapacity: types.ReturnConsumedCapacityTotal, 507 } 508 if e.limit != 0 { 509 queryInput.Limit = aws.Int32(int32(e.limit)) 510 } 511 512 queryResult, err := e.store.svc.Query(e.scanCtx, queryInput) 513 const operation = "Query" 514 if err != nil { 515 if e.store.isSlowDownErr(err) { 516 e.store.logger.WithField("partition_key", e.partitionKey).WithContext(e.scanCtx).Error("query: %w", kv.ErrSlowDown) 517 dynamoSlowdown.WithLabelValues("query").Inc() 518 err = errors.Join(err, kv.ErrSlowDown) 519 } 520 e.err = fmt.Errorf("query: %w", err) 521 return 522 } 523 if queryResult.ConsumedCapacity != nil { 524 dynamoConsumedCapacity.WithLabelValues(operation).Add(*queryResult.ConsumedCapacity.CapacityUnits) 525 } 526 e.queryResult = queryResult 527 e.currEntryIdx = 0 528 } 529 530 // isInRange checks if key falls within the range of keys on the queryResult. 531 // To optimize range checking: 532 // - If the current queryResult is a result of a seek operation with exclusiveStartKey use exclusiveStartKey as the minKey otherwise use e.startKey as the minKey. 533 // - Use LastEvaluatedKey as the Max value, in case LastEvaluatedKey is nil all keys greater than the minimum key are considered in range. 534 // This function returns true if e.startKey is within these defined range criteria. 535 func (e *EntriesIterator) isInRange(key []byte) bool { 536 minKey := e.startKey 537 if e.exclusiveStartKey != nil { 538 var minItem DynKVItem 539 e.err = attributevalue.UnmarshalMap(e.exclusiveStartKey, &minItem) 540 if e.err != nil { 541 return false 542 } 543 minKey = minItem.ItemKey 544 } 545 if bytes.Compare(key, minKey) < 0 { 546 return false 547 } 548 if e.queryResult.LastEvaluatedKey == nil { 549 // evaluated all -> all keys greater than minKey are in range 550 return true 551 } 552 var maxItem DynKVItem 553 e.err = attributevalue.UnmarshalMap(e.queryResult.LastEvaluatedKey, &maxItem) 554 if e.err != nil { 555 return false 556 } 557 return bytes.Compare(key, maxItem.ItemKey) <= 0 558 } 559 560 // StartPeriodicCheck performs one check and continues every 'interval' in the background 561 func (s *Store) StartPeriodicCheck() { 562 interval := s.params.HealthCheckInterval 563 if interval <= 0 { 564 return 565 } 566 s.wg.Add(1) 567 go func() { 568 defer s.wg.Done() 569 s.logger.WithField("interval", interval).Debug("Starting DynamoDB health check") 570 // check first and loop for checking every interval 571 s.Check() 572 ticker := time.NewTicker(interval) 573 defer ticker.Stop() 574 for { 575 select { 576 case <-ticker.C: 577 s.Check() 578 case <-s.cancel: 579 return 580 } 581 } 582 }() 583 } 584 585 func (s *Store) Check() { 586 log := s.logger.WithField("store_type", DriverName) 587 success, err := isTableExist(context.Background(), s.svc, s.params.TableName) 588 if success { 589 log.Debug("DynamoDB health check passed!") 590 } else { 591 log.WithError(err).Debug("DynamoDB health check failed") 592 } 593 } 594 595 func (s *Store) StopPeriodicCheck() { 596 if s.cancel != nil { 597 close(s.cancel) 598 s.wg.Wait() 599 s.cancel = nil 600 } 601 } 602 603 func (s *Store) isSlowDownErr(err error) bool { 604 for _, te := range retry.DefaultThrottles { 605 if te.IsErrorThrottle(err).Bool() { 606 return true 607 } 608 } 609 return false 610 }