github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/file/implementation.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache-2.0 3 // license that can be found in the LICENSE file. 4 5 package file 6 7 import ( 8 "context" 9 "fmt" 10 "sync" 11 "time" 12 ) 13 14 // Implementation implements operations for a file-system type. 15 // Thread safe. 16 type Implementation interface { 17 // String returns a diagnostic string. 18 String() string 19 20 // Open opens a file for reading. The pathname given to file.Open() is passed 21 // here unchanged. Thus, it contains the URL prefix such as "s3://". 22 // 23 // Open returns an error of kind errors.NotExist if there is 24 // no file at the provided path. 25 Open(ctx context.Context, path string, opts ...Opts) (File, error) 26 27 // Create opens a file for writing. If "path" already exists, the old contents 28 // will be destroyed. If "path" does not exist already, the file will be newly 29 // created. The pathname given to file.Create() is passed here unchanged. 30 // Thus, it contains the URL prefix such as "s3://". 31 // 32 // Creating a file with the same name as an existing directory is unspecified 33 // behavior and varies by implementation. Users are thus advised to avoid 34 // this if possible. 35 // 36 // For filesystem based storage engines (e.g. localfile), if the directory 37 // part of the path does not exist already, it will be created. If the path 38 // is a directory, an error will be returned. 39 // 40 // For key based storage engines (e.g. S3), it is OK to create a file that 41 // already exists as a common prefix for other objects, assuming a pseudo 42 // path separator. So both "foo" and "foo/bar" can be used as paths for 43 // creating regular files in the same storage. See List() for more context. 44 Create(ctx context.Context, path string, opts ...Opts) (File, error) 45 46 // List finds files and directories. If "path" points to a regular file, the 47 // lister will return information about the file itself and finishes. 48 // 49 // If "path" is a directory, the lister will list file and directory under the 50 // given path. When "recursive" is set to false, List finds files "one level" 51 // below dir. Dir may end in /, but need not. All the files and directories 52 // returned by the lister will have pathnames of the form dir/something. 53 // 54 // For key based storage engines (e.g. S3), a dir prefix not ending in "/" must 55 // be followed immediately by "/" in some object keys, and only such keys 56 // will be returned. 57 // With "recursive=true" List finds all files whose pathnames under "dir" or its 58 // subdirectories. All the files returned by the lister will have pathnames of 59 // the form dir/something. Directories will not be returned as separate entities. 60 // For example List(ctx, "foo",true) will yield "foo/bar/bat.txt", but not "foo.txt" 61 // or "foo/bar/", while List(ctx, "foo", false) will yield "foo/bar", and 62 // "foo/bat.txt", but not "foo.txt" or "foo/bar/bat.txt". There is no difference 63 // in the return value of List(ctx, "foo", ...) and List(ctx, "foo/", ...) 64 List(ctx context.Context, path string, recursive bool) Lister 65 66 // Stat returns the file metadata. It returns nil if path is 67 // a directory. (There is no direct test for existence of a 68 // directory.) 69 // 70 // Stat returns an error of kind errors.NotExist if there is 71 // no file at the provided path. 72 Stat(ctx context.Context, path string, opts ...Opts) (Info, error) 73 74 // Remove removes the file. The path passed to file.Remove() is passed here 75 // unchanged. 76 Remove(ctx context.Context, path string) error 77 78 // Presign returns a URL that can be used to perform the given HTTP method, 79 // usually one of "GET", "PUT" or "DELETE", on the path for the duration 80 // specified in expiry. 81 // 82 // It returns an error of kind errors.NotSupported for implementations that 83 // do not support signed URLs, or that do not support the given HTTP method. 84 // 85 // Unlike Open and Stat, this method does not return an error of kind 86 // errors.NotExist if there is no file at the provided path. 87 Presign(ctx context.Context, path, method string, expiry time.Duration) (url string, err error) 88 } 89 90 // Lister lists files in a directory tree. Not thread safe. 91 type Lister interface { 92 // Scan advances the lister to the next entry. It returns 93 // false either when the scan stops because we have reached the end of the input 94 // or else because there was error. After Scan returns, the Err method returns 95 // any error that occurred during scanning. 96 Scan() bool 97 98 // Err returns the first error that occurred while scanning. 99 Err() error 100 101 // Path returns the last path that was scanned. The path always starts with 102 // the directory path given to the List method. 103 // 104 // REQUIRES: Last call to Scan returned true. 105 Path() string 106 107 // IsDir() returns true if Path() refers to a directory in a file system 108 // or a common prefix ending in "/" in S3. 109 // 110 // REQUIRES: Last call to Scan returned true. 111 IsDir() bool 112 113 // Info returns metadata of the file that was scanned. 114 // 115 // REQUIRES: Last call to Scan returned true. 116 Info() Info 117 } 118 119 type implementationFactory func() Implementation 120 121 var ( 122 mu sync.RWMutex 123 implFactories = make(map[string]implementationFactory) 124 impls = make(map[string]Implementation) 125 localImplInstance = NewLocalImplementation() 126 ) 127 128 // RegisterImplementation arranges so that ParsePath(schema + "://anystring") 129 // will return (impl, "anystring", nil) in the future. Schema is a string such 130 // as "s3", "http". 131 // 132 // RegisterImplementation() should generally be called when the process starts. 133 // implFactory will be invoked exactly once, upon the first request to this scheme; 134 // this allows you to register with a factory that has not yet been full configured 135 // (e.g., it requires parsing command line flags) as long as it will be configured 136 // before the first request. 137 // 138 // REQUIRES: This function has not been called with the same schema before. 139 func RegisterImplementation(scheme string, implFactory func() Implementation) { 140 if implFactory == nil { 141 panic("Emptyl impl") 142 } 143 mu.Lock() 144 defer mu.Unlock() 145 if scheme == "" { 146 panic("Empty scheme") 147 } 148 if _, ok := implFactories[scheme]; ok { 149 panic(fmt.Sprintf("register %s: file scheme already registered", scheme)) 150 } 151 implFactories[scheme] = implFactory 152 } 153 154 // FindImplementation returns an Implementation object registered for the given 155 // scheme. It returns nil if the scheme is not registered. 156 func FindImplementation(scheme string) Implementation { 157 if scheme == "" { 158 return localImplInstance 159 } 160 mu.RLock() 161 162 // First look for an existing implementation 163 if impl, ok := impls[scheme]; ok { 164 mu.RUnlock() 165 return impl 166 } 167 168 // Next, look for a factory to make an implementation 169 mu.RUnlock() 170 mu.Lock() 171 if implFactory, ok := implFactories[scheme]; ok { 172 // Double check first that no one else created the implementation 173 // while we upgraded to the write lock 174 var impl Implementation 175 if impl, ok = impls[scheme]; !ok { 176 impl = implFactory() 177 impls[scheme] = impl 178 } 179 mu.Unlock() 180 return impl 181 } 182 183 // If neither of the above, then there's no implementation 184 mu.Unlock() 185 return nil 186 } 187 188 func findImpl(path string) (Implementation, error) { 189 scheme, _, err := ParsePath(path) 190 if err != nil { 191 return nil, err 192 } 193 impl := FindImplementation(scheme) 194 if impl == nil { 195 return nil, fmt.Errorf("parsepath %s: no implementation registered for scheme %s", path, scheme) 196 } 197 return impl, nil 198 } 199 200 // Open opens the given file readonly. It is a shortcut for calling 201 // ParsePath(), then FindImplementation, then Implementation.Open. 202 // 203 // Open returns an error of kind errors.NotExist if the file at the 204 // provided path does not exist. 205 func Open(ctx context.Context, path string, opts ...Opts) (File, error) { 206 impl, err := findImpl(path) 207 if err != nil { 208 return nil, err 209 } 210 return impl.Open(ctx, path, opts...) 211 } 212 213 // Create opens the given file writeonly. It is a shortcut for calling 214 // ParsePath(), then FindImplementation, then Implementation.Create. 215 func Create(ctx context.Context, path string, opts ...Opts) (File, error) { 216 impl, err := findImpl(path) 217 if err != nil { 218 return nil, err 219 } 220 return impl.Create(ctx, path, opts...) 221 } 222 223 // Stat returns the give file's metadata. Is a shortcut for calling ParsePath(), 224 // then FindImplementation, then Implementation.Stat. 225 // 226 // Stat returns an error of kind errors.NotExist if the file at the 227 // provided path does not exist. 228 func Stat(ctx context.Context, path string, opts ...Opts) (Info, error) { 229 impl, err := findImpl(path) 230 if err != nil { 231 return nil, err 232 } 233 return impl.Stat(ctx, path, opts...) 234 } 235 236 type errorLister struct{ err error } 237 238 // Scan implements Lister.Scan. 239 func (e *errorLister) Scan() bool { return false } 240 241 // Path implements Lister.path. 242 func (e *errorLister) Path() string { panic("errorLister.Path" + e.err.Error()) } 243 244 // Info implements Lister.Info. 245 func (e *errorLister) Info() Info { panic("errorLister.Info" + e.err.Error()) } 246 247 // IsDir implements Lister.IsDir. 248 func (e *errorLister) IsDir() bool { panic("errorLister.IsDir" + e.err.Error()) } 249 250 // Err returns the Lister.Err. 251 func (e *errorLister) Err() error { return e.err } 252 253 // List finds all files whose pathnames under "dir" or its subdirectories. All 254 // the files returned by the lister will have pathnames of form dir/something. 255 // For example List(ctx, "foo") will yield "foo/bar.txt", but not "foo.txt". 256 // 257 // Example: impl.List(ctx, "s3://grail-data/foo") 258 func List(ctx context.Context, prefix string, recursive bool) Lister { 259 impl, err := findImpl(prefix) 260 if err != nil { 261 return &errorLister{err: err} 262 } 263 return impl.List(ctx, prefix, recursive) 264 } 265 266 // Remove is a shortcut for calling ParsePath(), then calling 267 // Implementation.Remove method. 268 func Remove(ctx context.Context, path string) error { 269 impl, err := findImpl(path) 270 if err != nil { 271 return err 272 } 273 return impl.Remove(ctx, path) 274 } 275 276 // Presign is a shortcut for calling ParsePath(), then calling 277 // Implementation.Presign method. 278 func Presign(ctx context.Context, path, method string, expiry time.Duration) (string, error) { 279 impl, err := findImpl(path) 280 if err != nil { 281 return "", err 282 } 283 return impl.Presign(ctx, path, method, expiry) 284 } 285 286 // Opts controls the file access requests, such as Open and Stat. 287 type Opts struct { 288 // When set, this flag causes the file package to keep retrying when the file 289 // is reported as not found. This flag should be set when: 290 // 291 // 1. you are accessing a file on S3, and 292 // 293 // 2. an application may have attempted to GET the same file in recent past 294 // (~5 minutes). The said application may be on a different machine. 295 // 296 // This flag is honored only by S3 to work around the problem where s3 may 297 // report spurious KeyNotFound error after a GET request to the same file. 298 // For more details, see 299 // https://docs.aws.amazon.com/AmazonS3/latest/dev/Introduction.html#CoreConcepts, 300 // section "S3 Data Consistency Model". In particular: 301 // 302 // The caveat is that if you make a HEAD or GET request to the key 303 // name (to find if the object exists) before creating the object, Amazon S3 304 // provides eventual consistency for read-after-write. 305 RetryWhenNotFound bool 306 307 // When set, Close will ignore NoSuchUpload error from S3 308 // CompleteMultiPartUpload and silently returns OK. 309 // 310 // This is to work around a bug where concurrent uploads to one file sometimes 311 // causes an upload request to be lost on the server side. 312 // https://console.aws.amazon.com/support/cases?region=us-west-2#/6299905521/en 313 // https://github.com/yasushi-saito/s3uploaderror 314 // 315 // Set this flag only if: 316 // 317 // 1. you are writing to a file on S3, and 318 // 319 // 2. possible concurrent writes to the same file produce the same 320 // contents, so you are ok with taking any of them. 321 // 322 // If you don't set this flag, then concurrent writes to the same file may 323 // fail with a NoSuchUpload error, and it is up to you to retry. 324 // 325 // On non-S3 file systems, this flag is ignored. 326 IgnoreNoSuchUpload bool 327 }