github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/block/gs/walker.go (about) 1 package gs 2 3 import ( 4 "context" 5 "encoding/hex" 6 "errors" 7 "fmt" 8 "net/url" 9 "strings" 10 11 "cloud.google.com/go/storage" 12 "github.com/treeverse/lakefs/pkg/block" 13 "google.golang.org/api/iterator" 14 ) 15 16 type GCSWalker struct { 17 client *storage.Client 18 mark block.Mark 19 } 20 21 func NewGCSWalker(client *storage.Client) *GCSWalker { 22 return &GCSWalker{client: client} 23 } 24 25 func (w *GCSWalker) Walk(ctx context.Context, storageURI *url.URL, op block.WalkOptions, walkFn func(e block.ObjectStoreEntry) error) error { 26 prefix := strings.TrimLeft(storageURI.Path, "/") 27 var basePath string 28 if idx := strings.LastIndex(prefix, "/"); idx != -1 { 29 basePath = prefix[:idx+1] 30 } 31 iter := w.client. 32 Bucket(storageURI.Host). 33 Objects(ctx, &storage.Query{ 34 Prefix: prefix, 35 StartOffset: op.After, 36 }) 37 38 for { 39 attrs, err := iter.Next() 40 if errors.Is(err, iterator.Done) { 41 break 42 } 43 if err != nil { 44 return fmt.Errorf("error listing objects at storage uri %s: %w", storageURI, err) 45 } 46 47 // skipping first key (without forgetting the possible empty string key!) 48 if op.After != "" && attrs.Name <= op.After { 49 continue 50 } 51 52 w.mark = block.Mark{ 53 LastKey: attrs.Name, 54 HasMore: true, 55 } 56 if err := walkFn(block.ObjectStoreEntry{ 57 FullKey: attrs.Name, 58 RelativeKey: strings.TrimPrefix(attrs.Name, basePath), 59 Address: fmt.Sprintf("gs://%s/%s", attrs.Bucket, attrs.Name), 60 ETag: hex.EncodeToString(attrs.MD5), 61 Mtime: attrs.Updated, 62 Size: attrs.Size, 63 }); err != nil { 64 return err 65 } 66 } 67 w.mark = block.Mark{ 68 LastKey: "", 69 HasMore: false, 70 } 71 72 return nil 73 } 74 75 func (w *GCSWalker) Marker() block.Mark { 76 return w.mark 77 } 78 79 func (w *GCSWalker) GetSkippedEntries() []block.ObjectStoreEntry { 80 return nil 81 }