github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/file/s3file/list.go (about)

     1  package s3file
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strings"
     7  
     8  	"github.com/aws/aws-sdk-go/aws"
     9  	"github.com/aws/aws-sdk-go/service/s3"
    10  	"github.com/Schaudge/grailbase/file"
    11  	"github.com/Schaudge/grailbase/log"
    12  )
    13  
    14  // List implements file.Implementation interface.
    15  func (impl *s3Impl) List(ctx context.Context, dir string, recurse bool) file.Lister {
    16  	scheme, bucket, key, err := ParseURL(dir)
    17  	if err != nil {
    18  		return &s3Lister{ctx: ctx, dir: dir, err: err}
    19  	}
    20  	if bucket == "" {
    21  		if recurse {
    22  			return &s3Lister{ctx: ctx, dir: dir,
    23  				err: fmt.Errorf("list %s: ListBuckets cannot be combined with recurse option", dir)}
    24  		}
    25  		clients, clientsErr := impl.clientsForAction(ctx, "ListAllMyBuckets", bucket, key)
    26  		if clientsErr != nil {
    27  			return &s3Lister{ctx: ctx, dir: dir, err: clientsErr}
    28  		}
    29  		return &s3BucketLister{
    30  			ctx:     ctx,
    31  			scheme:  scheme,
    32  			clients: clients,
    33  		}
    34  	}
    35  	clients, err := impl.clientsForAction(ctx, "ListBucket", bucket, key)
    36  	if err != nil {
    37  		return &s3Lister{ctx: ctx, dir: dir, err: err}
    38  	}
    39  	return &s3Lister{
    40  		ctx:     ctx,
    41  		policy:  newBackoffPolicy(clients, file.Opts{}),
    42  		dir:     dir,
    43  		scheme:  scheme,
    44  		bucket:  bucket,
    45  		prefix:  key,
    46  		recurse: recurse,
    47  	}
    48  }
    49  
    50  type s3Lister struct {
    51  	ctx                         context.Context
    52  	policy                      retryPolicy
    53  	dir, scheme, bucket, prefix string
    54  
    55  	object  s3Obj
    56  	objects []s3Obj
    57  	token   *string
    58  	err     error
    59  	done    bool
    60  	recurse bool
    61  
    62  	// consecutiveEmptyResponses counts how many times S3's ListObjectsV2WithContext returned
    63  	// 0 records (either contents or common prefixes) consecutively.
    64  	// Many empty responses would cause Scan to appear to hang, so we log a warning.
    65  	consecutiveEmptyResponses int
    66  }
    67  
    68  type s3Obj struct {
    69  	obj *s3.Object
    70  	cp  *string
    71  }
    72  
    73  func (o s3Obj) name() string {
    74  	if o.obj == nil {
    75  		return *o.cp
    76  	}
    77  	return *o.obj.Key
    78  }
    79  
    80  // Scan implements Lister.Scan
    81  func (l *s3Lister) Scan() bool {
    82  	for {
    83  		if l.err != nil {
    84  			return false
    85  		}
    86  		l.err = l.ctx.Err()
    87  		if l.err != nil {
    88  			return false
    89  		}
    90  		if len(l.objects) > 0 {
    91  			l.object, l.objects = l.objects[0], l.objects[1:]
    92  			ll := len(l.prefix)
    93  			// Ignore keys whose path component isn't exactly equal to l.prefix.  For
    94  			// example, if l.prefix="foo/bar", then we yield "foo/bar" and
    95  			// "foo/bar/baz", but not "foo/barbaz".
    96  			keyVal := l.object.name()
    97  			if ll > 0 && len(keyVal) > ll {
    98  				if l.prefix[ll-1] == '/' {
    99  					// Treat prefix "foo/bar/" as "foo/bar".
   100  					ll--
   101  				}
   102  				if keyVal[ll] != '/' {
   103  					continue
   104  				}
   105  			}
   106  			return true
   107  		}
   108  		if l.done {
   109  			return false
   110  		}
   111  
   112  		var prefix string
   113  		if l.showDirs() && !strings.HasSuffix(l.prefix, pathSeparator) && l.prefix != "" {
   114  			prefix = l.prefix + pathSeparator
   115  		} else {
   116  			prefix = l.prefix
   117  		}
   118  
   119  		req := &s3.ListObjectsV2Input{
   120  			Bucket:            aws.String(l.bucket),
   121  			ContinuationToken: l.token,
   122  			Prefix:            aws.String(prefix),
   123  		}
   124  
   125  		if l.showDirs() {
   126  			req.Delimiter = aws.String(pathSeparator)
   127  		}
   128  		var ids s3RequestIDs
   129  		res, err := l.policy.client().ListObjectsV2WithContext(l.ctx, req, ids.captureOption())
   130  		if l.policy.shouldRetry(l.ctx, err, l.dir) {
   131  			continue
   132  		}
   133  		if err != nil {
   134  			l.err = annotate(err, ids, &l.policy, fmt.Sprintf("s3file.list s3://%s/%s", l.bucket, l.prefix))
   135  			return false
   136  		}
   137  		l.token = res.NextContinuationToken
   138  		nRecords := len(res.Contents)
   139  		if l.showDirs() {
   140  			nRecords += len(res.CommonPrefixes)
   141  		}
   142  		if nRecords > 0 {
   143  			l.consecutiveEmptyResponses = 0
   144  		} else {
   145  			l.consecutiveEmptyResponses++
   146  			if n := l.consecutiveEmptyResponses; n > 7 && n&(n-1) == 0 {
   147  				log.Printf("s3file.list.scan: warning: S3 returned empty response %d consecutive times", n)
   148  			}
   149  		}
   150  		l.objects = make([]s3Obj, 0, nRecords)
   151  		for _, objVal := range res.Contents {
   152  			l.objects = append(l.objects, s3Obj{obj: objVal})
   153  		}
   154  		if l.showDirs() { // add the pseudo Dirs
   155  			for _, cpVal := range res.CommonPrefixes {
   156  				// Follow the Linux convention that directories do not come back with a trailing /
   157  				// when read by ListDir.  To determine it is a directory, it is necessary to
   158  				// call implementation.Stat on the path and check IsDir()
   159  				pseudoDirName := *cpVal.Prefix
   160  				if strings.HasSuffix(pseudoDirName, pathSeparator) {
   161  					pseudoDirName = pseudoDirName[:len(pseudoDirName)-1]
   162  				}
   163  				l.objects = append(l.objects, s3Obj{cp: &pseudoDirName})
   164  			}
   165  		}
   166  
   167  		l.done = !aws.BoolValue(res.IsTruncated)
   168  	}
   169  }
   170  
   171  // Path implements Lister.Path
   172  func (l *s3Lister) Path() string {
   173  	return fmt.Sprintf("%s://%s/%s", l.scheme, l.bucket, l.object.name())
   174  }
   175  
   176  // Info implements Lister.Info
   177  func (l *s3Lister) Info() file.Info {
   178  	if obj := l.object.obj; obj != nil {
   179  		return &s3Info{
   180  			size:    *obj.Size,
   181  			modTime: *obj.LastModified,
   182  			etag:    *obj.ETag,
   183  		}
   184  	}
   185  	return nil
   186  }
   187  
   188  // IsDir implements Lister.IsDir
   189  func (l *s3Lister) IsDir() bool {
   190  	return l.object.cp != nil
   191  }
   192  
   193  // Err returns an error, if any.
   194  func (l *s3Lister) Err() error {
   195  	return l.err
   196  }
   197  
   198  // showDirs controls whether CommonPrefixes are returned during a scan
   199  func (l *s3Lister) showDirs() bool {
   200  	return !l.recurse
   201  }