github.com/grailbio/base@v0.0.11/file/s3file/list.go (about) 1 package s3file 2 3 import ( 4 "context" 5 "fmt" 6 "strings" 7 8 "github.com/aws/aws-sdk-go/aws" 9 "github.com/aws/aws-sdk-go/service/s3" 10 "github.com/grailbio/base/file" 11 "github.com/grailbio/base/log" 12 ) 13 14 // List implements file.Implementation interface. 15 func (impl *s3Impl) List(ctx context.Context, dir string, recurse bool) file.Lister { 16 scheme, bucket, key, err := ParseURL(dir) 17 if err != nil { 18 return &s3Lister{ctx: ctx, dir: dir, err: err} 19 } 20 if bucket == "" { 21 if recurse { 22 return &s3Lister{ctx: ctx, dir: dir, 23 err: fmt.Errorf("list %s: ListBuckets cannot be combined with recurse option", dir)} 24 } 25 clients, clientsErr := impl.clientsForAction(ctx, "ListAllMyBuckets", bucket, key) 26 if clientsErr != nil { 27 return &s3Lister{ctx: ctx, dir: dir, err: clientsErr} 28 } 29 return &s3BucketLister{ 30 ctx: ctx, 31 scheme: scheme, 32 clients: clients, 33 } 34 } 35 clients, err := impl.clientsForAction(ctx, "ListBucket", bucket, key) 36 if err != nil { 37 return &s3Lister{ctx: ctx, dir: dir, err: err} 38 } 39 return &s3Lister{ 40 ctx: ctx, 41 policy: newBackoffPolicy(clients, file.Opts{}), 42 dir: dir, 43 scheme: scheme, 44 bucket: bucket, 45 prefix: key, 46 recurse: recurse, 47 } 48 } 49 50 type s3Lister struct { 51 ctx context.Context 52 policy retryPolicy 53 dir, scheme, bucket, prefix string 54 55 object s3Obj 56 objects []s3Obj 57 token *string 58 err error 59 done bool 60 recurse bool 61 62 // consecutiveEmptyResponses counts how many times S3's ListObjectsV2WithContext returned 63 // 0 records (either contents or common prefixes) consecutively. 64 // Many empty responses would cause Scan to appear to hang, so we log a warning. 65 consecutiveEmptyResponses int 66 } 67 68 type s3Obj struct { 69 obj *s3.Object 70 cp *string 71 } 72 73 func (o s3Obj) name() string { 74 if o.obj == nil { 75 return *o.cp 76 } 77 return *o.obj.Key 78 } 79 80 // Scan implements Lister.Scan 81 func (l *s3Lister) Scan() bool { 82 for { 83 if l.err != nil { 84 return false 85 } 86 l.err = l.ctx.Err() 87 if l.err != nil { 88 return false 89 } 90 if len(l.objects) > 0 { 91 l.object, l.objects = l.objects[0], l.objects[1:] 92 ll := len(l.prefix) 93 // Ignore keys whose path component isn't exactly equal to l.prefix. For 94 // example, if l.prefix="foo/bar", then we yield "foo/bar" and 95 // "foo/bar/baz", but not "foo/barbaz". 96 keyVal := l.object.name() 97 if ll > 0 && len(keyVal) > ll { 98 if l.prefix[ll-1] == '/' { 99 // Treat prefix "foo/bar/" as "foo/bar". 100 ll-- 101 } 102 if keyVal[ll] != '/' { 103 continue 104 } 105 } 106 return true 107 } 108 if l.done { 109 return false 110 } 111 112 var prefix string 113 if l.showDirs() && !strings.HasSuffix(l.prefix, pathSeparator) && l.prefix != "" { 114 prefix = l.prefix + pathSeparator 115 } else { 116 prefix = l.prefix 117 } 118 119 req := &s3.ListObjectsV2Input{ 120 Bucket: aws.String(l.bucket), 121 ContinuationToken: l.token, 122 Prefix: aws.String(prefix), 123 } 124 125 if l.showDirs() { 126 req.Delimiter = aws.String(pathSeparator) 127 } 128 var ids s3RequestIDs 129 res, err := l.policy.client().ListObjectsV2WithContext(l.ctx, req, ids.captureOption()) 130 if l.policy.shouldRetry(l.ctx, err, l.dir) { 131 continue 132 } 133 if err != nil { 134 l.err = annotate(err, ids, &l.policy, fmt.Sprintf("s3file.list s3://%s/%s", l.bucket, l.prefix)) 135 return false 136 } 137 l.token = res.NextContinuationToken 138 nRecords := len(res.Contents) 139 if l.showDirs() { 140 nRecords += len(res.CommonPrefixes) 141 } 142 if nRecords > 0 { 143 l.consecutiveEmptyResponses = 0 144 } else { 145 l.consecutiveEmptyResponses++ 146 if n := l.consecutiveEmptyResponses; n > 7 && n&(n-1) == 0 { 147 log.Printf("s3file.list.scan: warning: S3 returned empty response %d consecutive times", n) 148 } 149 } 150 l.objects = make([]s3Obj, 0, nRecords) 151 for _, objVal := range res.Contents { 152 l.objects = append(l.objects, s3Obj{obj: objVal}) 153 } 154 if l.showDirs() { // add the pseudo Dirs 155 for _, cpVal := range res.CommonPrefixes { 156 // Follow the Linux convention that directories do not come back with a trailing / 157 // when read by ListDir. To determine it is a directory, it is necessary to 158 // call implementation.Stat on the path and check IsDir() 159 pseudoDirName := *cpVal.Prefix 160 if strings.HasSuffix(pseudoDirName, pathSeparator) { 161 pseudoDirName = pseudoDirName[:len(pseudoDirName)-1] 162 } 163 l.objects = append(l.objects, s3Obj{cp: &pseudoDirName}) 164 } 165 } 166 167 l.done = !aws.BoolValue(res.IsTruncated) 168 } 169 } 170 171 // Path implements Lister.Path 172 func (l *s3Lister) Path() string { 173 return fmt.Sprintf("%s://%s/%s", l.scheme, l.bucket, l.object.name()) 174 } 175 176 // Info implements Lister.Info 177 func (l *s3Lister) Info() file.Info { 178 if obj := l.object.obj; obj != nil { 179 return &s3Info{ 180 size: *obj.Size, 181 modTime: *obj.LastModified, 182 etag: *obj.ETag, 183 } 184 } 185 return nil 186 } 187 188 // IsDir implements Lister.IsDir 189 func (l *s3Lister) IsDir() bool { 190 return l.object.cp != nil 191 } 192 193 // Err returns an error, if any. 194 func (l *s3Lister) Err() error { 195 return l.err 196 } 197 198 // showDirs controls whether CommonPrefixes are returned during a scan 199 func (l *s3Lister) showDirs() bool { 200 return !l.recurse 201 }