github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/block/s3/walker.go (about) 1 package s3 2 3 import ( 4 "context" 5 "fmt" 6 "net/url" 7 "strings" 8 9 "github.com/aws/aws-sdk-go-v2/aws" 10 "github.com/aws/aws-sdk-go-v2/service/s3" 11 "github.com/treeverse/lakefs/pkg/block" 12 ) 13 14 type Walker struct { 15 client *s3.Client 16 mark block.Mark 17 } 18 19 func NewS3Walker(client *s3.Client) *Walker { 20 return &Walker{ 21 client: client, 22 mark: block.Mark{HasMore: true}, 23 } 24 } 25 26 func (s *Walker) Walk(ctx context.Context, storageURI *url.URL, op block.WalkOptions, walkFn func(e block.ObjectStoreEntry) error) error { 27 var continuation *string 28 const maxKeys = 1000 29 prefix := strings.TrimLeft(storageURI.Path, "/") 30 31 // basePath is the path relative to which the walk is done. The key of the resulting entries will be relative to this path. 32 // As the original prefix might not end with a separator, it cannot be used for the 33 // trim purpose, as this will create partial "folder" names. When the basePath is 34 // trimmed from the key, the remains will be the object name. 35 // Example: 36 // Say we have the following keys: 37 // pref/object 38 // pref/obj/another 39 // If we specify prefix="pref/obj" (both keys will be listed) then basePath="pref/" and the trim result 40 // for the keys will be: 41 // object 42 // obj/another 43 var basePath string 44 if idx := strings.LastIndex(prefix, "/"); idx != -1 { 45 basePath = prefix[:idx+1] 46 } 47 bucket := storageURI.Host 48 for { 49 result, err := s.client.ListObjectsV2(ctx, &s3.ListObjectsV2Input{ 50 Bucket: aws.String(bucket), 51 ContinuationToken: continuation, 52 MaxKeys: aws.Int32(maxKeys), 53 Prefix: aws.String(prefix), 54 StartAfter: aws.String(op.After), 55 }) 56 if continuation != nil { 57 s.mark.ContinuationToken = *continuation 58 } 59 if err != nil { 60 return err 61 } 62 for _, record := range result.Contents { 63 key := aws.ToString(record.Key) 64 addr := fmt.Sprintf("s3://%s/%s", bucket, key) 65 ent := block.ObjectStoreEntry{ 66 FullKey: key, 67 RelativeKey: strings.TrimPrefix(key, basePath), 68 Address: addr, 69 ETag: strings.Trim(aws.ToString(record.ETag), "\""), 70 Mtime: aws.ToTime(record.LastModified), 71 Size: aws.ToInt64(record.Size), 72 } 73 s.mark.LastKey = key 74 err := walkFn(ent) 75 if err != nil { 76 return err 77 } 78 } 79 if !aws.ToBool(result.IsTruncated) { 80 break 81 } 82 continuation = result.NextContinuationToken 83 } 84 s.mark = block.Mark{ 85 LastKey: "", 86 HasMore: false, 87 } 88 return nil 89 } 90 91 func (s *Walker) Marker() block.Mark { 92 return s.mark 93 } 94 95 func (s *Walker) GetSkippedEntries() []block.ObjectStoreEntry { 96 return nil 97 }