github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/block/s3/walker.go (about)

     1  package s3
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net/url"
     7  	"strings"
     8  
     9  	"github.com/aws/aws-sdk-go-v2/aws"
    10  	"github.com/aws/aws-sdk-go-v2/service/s3"
    11  	"github.com/treeverse/lakefs/pkg/block"
    12  )
    13  
    14  type Walker struct {
    15  	client *s3.Client
    16  	mark   block.Mark
    17  }
    18  
    19  func NewS3Walker(client *s3.Client) *Walker {
    20  	return &Walker{
    21  		client: client,
    22  		mark:   block.Mark{HasMore: true},
    23  	}
    24  }
    25  
    26  func (s *Walker) Walk(ctx context.Context, storageURI *url.URL, op block.WalkOptions, walkFn func(e block.ObjectStoreEntry) error) error {
    27  	var continuation *string
    28  	const maxKeys = 1000
    29  	prefix := strings.TrimLeft(storageURI.Path, "/")
    30  
    31  	// basePath is the path relative to which the walk is done. The key of the resulting entries will be relative to this path.
    32  	// As the original prefix might not end with a separator, it cannot be used for the
    33  	// trim purpose, as this will create partial "folder" names. When the basePath is
    34  	// trimmed from the key, the remains will be the object name.
    35  	// Example:
    36  	// Say we have the following keys:
    37  	// pref/object
    38  	// pref/obj/another
    39  	// If we specify prefix="pref/obj" (both keys will be listed) then basePath="pref/" and the trim result
    40  	// for the keys will be:
    41  	// object
    42  	// obj/another
    43  	var basePath string
    44  	if idx := strings.LastIndex(prefix, "/"); idx != -1 {
    45  		basePath = prefix[:idx+1]
    46  	}
    47  	bucket := storageURI.Host
    48  	for {
    49  		result, err := s.client.ListObjectsV2(ctx, &s3.ListObjectsV2Input{
    50  			Bucket:            aws.String(bucket),
    51  			ContinuationToken: continuation,
    52  			MaxKeys:           aws.Int32(maxKeys),
    53  			Prefix:            aws.String(prefix),
    54  			StartAfter:        aws.String(op.After),
    55  		})
    56  		if continuation != nil {
    57  			s.mark.ContinuationToken = *continuation
    58  		}
    59  		if err != nil {
    60  			return err
    61  		}
    62  		for _, record := range result.Contents {
    63  			key := aws.ToString(record.Key)
    64  			addr := fmt.Sprintf("s3://%s/%s", bucket, key)
    65  			ent := block.ObjectStoreEntry{
    66  				FullKey:     key,
    67  				RelativeKey: strings.TrimPrefix(key, basePath),
    68  				Address:     addr,
    69  				ETag:        strings.Trim(aws.ToString(record.ETag), "\""),
    70  				Mtime:       aws.ToTime(record.LastModified),
    71  				Size:        aws.ToInt64(record.Size),
    72  			}
    73  			s.mark.LastKey = key
    74  			err := walkFn(ent)
    75  			if err != nil {
    76  				return err
    77  			}
    78  		}
    79  		if !aws.ToBool(result.IsTruncated) {
    80  			break
    81  		}
    82  		continuation = result.NextContinuationToken
    83  	}
    84  	s.mark = block.Mark{
    85  		LastKey: "",
    86  		HasMore: false,
    87  	}
    88  	return nil
    89  }
    90  
    91  func (s *Walker) Marker() block.Mark {
    92  	return s.mark
    93  }
    94  
    95  func (s *Walker) GetSkippedEntries() []block.ObjectStoreEntry {
    96  	return nil
    97  }