github.com/grailbio/base@v0.0.11/file/implementation.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package file
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"sync"
    11  	"time"
    12  )
    13  
    14  // Implementation implements operations for a file-system type.
    15  // Thread safe.
    16  type Implementation interface {
    17  	// String returns a diagnostic string.
    18  	String() string
    19  
    20  	// Open opens a file for reading. The pathname given to file.Open() is passed
    21  	// here unchanged. Thus, it contains the URL prefix such as "s3://".
    22  	//
    23  	// Open returns an error of kind errors.NotExist if there is
    24  	// no file at the provided path.
    25  	Open(ctx context.Context, path string, opts ...Opts) (File, error)
    26  
    27  	// Create opens a file for writing. If "path" already exists, the old contents
    28  	// will be destroyed. If "path" does not exist already, the file will be newly
    29  	// created. The pathname given to file.Create() is passed here unchanged.
    30  	// Thus, it contains the URL prefix such as "s3://".
    31  	//
    32  	// Creating a file with the same name as an existing directory is unspecified
    33  	// behavior and varies by implementation. Users are thus advised to avoid
    34  	// this if possible.
    35  	//
    36  	// For filesystem based storage engines (e.g. localfile), if the directory
    37  	// part of the path does not exist already, it will be created. If the path
    38  	// is a directory, an error will be returned.
    39  	//
    40  	// For key based storage engines (e.g. S3), it is OK to create a file that
    41  	// already exists as a common prefix for other objects, assuming a pseudo
    42  	// path separator. So both "foo" and "foo/bar" can be used as paths for
    43  	// creating regular files in the same storage. See List() for more context.
    44  	Create(ctx context.Context, path string, opts ...Opts) (File, error)
    45  
    46  	// List finds files and directories. If "path" points to a regular file, the
    47  	// lister will return information about the file itself and finishes.
    48  	//
    49  	// If "path" is a directory, the lister will list file and directory under the
    50  	// given path.  When "recursive" is set to false, List finds files "one level"
    51  	// below dir.  Dir may end in /, but need not.  All the files and directories
    52  	// returned by the lister will have pathnames of the form dir/something.
    53  	//
    54  	// For key based storage engines (e.g. S3), a dir prefix not ending in "/" must
    55  	// be followed immediately by "/" in some object keys, and only such keys
    56  	// will be returned.
    57  	// With "recursive=true" List finds all files whose pathnames under "dir" or its
    58  	// subdirectories.  All the files returned by the lister will have pathnames of
    59  	// the form dir/something.  Directories will not be returned as separate entities.
    60  	// For example List(ctx, "foo",true) will yield "foo/bar/bat.txt", but not "foo.txt"
    61  	// or "foo/bar/", while List(ctx, "foo", false) will yield "foo/bar", and
    62  	// "foo/bat.txt", but not "foo.txt" or "foo/bar/bat.txt".  There is no difference
    63  	// in the return value of List(ctx, "foo", ...) and List(ctx, "foo/", ...)
    64  	List(ctx context.Context, path string, recursive bool) Lister
    65  
    66  	// Stat returns the file metadata.  It returns nil if path is
    67  	// a directory. (There is no direct test for existence of a
    68  	// directory.)
    69  	//
    70  	// Stat returns an error of kind errors.NotExist if there is
    71  	// no file at the provided path.
    72  	Stat(ctx context.Context, path string, opts ...Opts) (Info, error)
    73  
    74  	// Remove removes the file. The path passed to file.Remove() is passed here
    75  	// unchanged.
    76  	Remove(ctx context.Context, path string) error
    77  
    78  	// Presign returns a URL that can be used to perform the given HTTP method,
    79  	// usually one of "GET", "PUT" or "DELETE", on the path for the duration
    80  	// specified in expiry.
    81  	//
    82  	// It returns an error of kind errors.NotSupported for implementations that
    83  	// do not support signed URLs, or that do not support the given HTTP method.
    84  	//
    85  	// Unlike Open and Stat, this method does not return an error of kind
    86  	// errors.NotExist if there is no file at the provided path.
    87  	Presign(ctx context.Context, path, method string, expiry time.Duration) (url string, err error)
    88  }
    89  
    90  // Lister lists files in a directory tree. Not thread safe.
    91  type Lister interface {
    92  	// Scan advances the lister to the next entry.  It returns
    93  	// false either when the scan stops because we have reached the end of the input
    94  	// or else because there was error.  After Scan returns, the Err method returns
    95  	// any error that occurred during scanning.
    96  	Scan() bool
    97  
    98  	// Err returns the first error that occurred while scanning.
    99  	Err() error
   100  
   101  	// Path returns the last path that was scanned. The path always starts with
   102  	// the directory path given to the List method.
   103  	//
   104  	// REQUIRES: Last call to Scan returned true.
   105  	Path() string
   106  
   107  	// IsDir() returns true if Path() refers to a directory in a file system
   108  	// or a common prefix ending in "/" in S3.
   109  	//
   110  	// REQUIRES: Last call to Scan returned true.
   111  	IsDir() bool
   112  
   113  	// Info returns metadata of the file that was scanned.
   114  	//
   115  	// REQUIRES: Last call to Scan returned true.
   116  	Info() Info
   117  }
   118  
   119  type implementationFactory func() Implementation
   120  
   121  var (
   122  	mu                sync.RWMutex
   123  	implFactories     = make(map[string]implementationFactory)
   124  	impls             = make(map[string]Implementation)
   125  	localImplInstance = NewLocalImplementation()
   126  )
   127  
   128  // RegisterImplementation arranges so that ParsePath(schema + "://anystring")
   129  // will return (impl, "anystring", nil) in the future. Schema is a string such
   130  // as "s3", "http".
   131  //
   132  // RegisterImplementation() should generally be called when the process starts.
   133  // implFactory will be invoked exactly once, upon the first request to this scheme;
   134  // this allows you to register with a factory that has not yet been full configured
   135  // (e.g., it requires parsing command line flags) as long as it will be configured
   136  // before the first request.
   137  //
   138  // REQUIRES: This function has not been called with the same schema before.
   139  func RegisterImplementation(scheme string, implFactory func() Implementation) {
   140  	if implFactory == nil {
   141  		panic("Emptyl impl")
   142  	}
   143  	mu.Lock()
   144  	defer mu.Unlock()
   145  	if scheme == "" {
   146  		panic("Empty scheme")
   147  	}
   148  	if _, ok := implFactories[scheme]; ok {
   149  		panic(fmt.Sprintf("register %s: file scheme already registered", scheme))
   150  	}
   151  	implFactories[scheme] = implFactory
   152  }
   153  
   154  // FindImplementation returns an Implementation object registered for the given
   155  // scheme.  It returns nil if the scheme is not registered.
   156  func FindImplementation(scheme string) Implementation {
   157  	if scheme == "" {
   158  		return localImplInstance
   159  	}
   160  	mu.RLock()
   161  
   162  	// First look for an existing implementation
   163  	if impl, ok := impls[scheme]; ok {
   164  		mu.RUnlock()
   165  		return impl
   166  	}
   167  
   168  	// Next, look for a factory to make an implementation
   169  	mu.RUnlock()
   170  	mu.Lock()
   171  	if implFactory, ok := implFactories[scheme]; ok {
   172  		// Double check first that no one else created the implementation
   173  		// while we upgraded to the write lock
   174  		var impl Implementation
   175  		if impl, ok = impls[scheme]; !ok {
   176  			impl = implFactory()
   177  			impls[scheme] = impl
   178  		}
   179  		mu.Unlock()
   180  		return impl
   181  	}
   182  
   183  	// If neither of the above, then there's no implementation
   184  	mu.Unlock()
   185  	return nil
   186  }
   187  
   188  func findImpl(path string) (Implementation, error) {
   189  	scheme, _, err := ParsePath(path)
   190  	if err != nil {
   191  		return nil, err
   192  	}
   193  	impl := FindImplementation(scheme)
   194  	if impl == nil {
   195  		return nil, fmt.Errorf("parsepath %s: no implementation registered for scheme %s", path, scheme)
   196  	}
   197  	return impl, nil
   198  }
   199  
   200  // Open opens the given file readonly.  It is a shortcut for calling
   201  // ParsePath(), then FindImplementation, then Implementation.Open.
   202  //
   203  // Open returns an error of kind errors.NotExist if the file at the
   204  // provided path does not exist.
   205  func Open(ctx context.Context, path string, opts ...Opts) (File, error) {
   206  	impl, err := findImpl(path)
   207  	if err != nil {
   208  		return nil, err
   209  	}
   210  	return impl.Open(ctx, path, opts...)
   211  }
   212  
   213  // Create opens the given file writeonly. It is a shortcut for calling
   214  // ParsePath(), then FindImplementation, then Implementation.Create.
   215  func Create(ctx context.Context, path string, opts ...Opts) (File, error) {
   216  	impl, err := findImpl(path)
   217  	if err != nil {
   218  		return nil, err
   219  	}
   220  	return impl.Create(ctx, path, opts...)
   221  }
   222  
   223  // Stat returns the give file's metadata. Is a shortcut for calling ParsePath(),
   224  // then FindImplementation, then Implementation.Stat.
   225  //
   226  // Stat returns an error of kind errors.NotExist if the file at the
   227  // provided path does not exist.
   228  func Stat(ctx context.Context, path string, opts ...Opts) (Info, error) {
   229  	impl, err := findImpl(path)
   230  	if err != nil {
   231  		return nil, err
   232  	}
   233  	return impl.Stat(ctx, path, opts...)
   234  }
   235  
   236  type errorLister struct{ err error }
   237  
   238  // Scan implements Lister.Scan.
   239  func (e *errorLister) Scan() bool { return false }
   240  
   241  // Path implements Lister.path.
   242  func (e *errorLister) Path() string { panic("errorLister.Path" + e.err.Error()) }
   243  
   244  // Info implements Lister.Info.
   245  func (e *errorLister) Info() Info { panic("errorLister.Info" + e.err.Error()) }
   246  
   247  // IsDir implements Lister.IsDir.
   248  func (e *errorLister) IsDir() bool { panic("errorLister.IsDir" + e.err.Error()) }
   249  
   250  // Err returns the Lister.Err.
   251  func (e *errorLister) Err() error { return e.err }
   252  
   253  // List finds all files whose pathnames under "dir" or its subdirectories.  All
   254  // the files returned by the lister will have pathnames of form dir/something.
   255  // For example List(ctx, "foo") will yield "foo/bar.txt", but not "foo.txt".
   256  //
   257  // Example: impl.List(ctx, "s3://grail-data/foo")
   258  func List(ctx context.Context, prefix string, recursive bool) Lister {
   259  	impl, err := findImpl(prefix)
   260  	if err != nil {
   261  		return &errorLister{err: err}
   262  	}
   263  	return impl.List(ctx, prefix, recursive)
   264  }
   265  
   266  // Remove is a shortcut for calling ParsePath(), then calling
   267  // Implementation.Remove method.
   268  func Remove(ctx context.Context, path string) error {
   269  	impl, err := findImpl(path)
   270  	if err != nil {
   271  		return err
   272  	}
   273  	return impl.Remove(ctx, path)
   274  }
   275  
   276  // Presign is a shortcut for calling ParsePath(), then calling
   277  // Implementation.Presign method.
   278  func Presign(ctx context.Context, path, method string, expiry time.Duration) (string, error) {
   279  	impl, err := findImpl(path)
   280  	if err != nil {
   281  		return "", err
   282  	}
   283  	return impl.Presign(ctx, path, method, expiry)
   284  }
   285  
   286  // Opts controls the file access requests, such as Open and Stat.
   287  type Opts struct {
   288  	// When set, this flag causes the file package to keep retrying when the file
   289  	// is reported as not found. This flag should be set when:
   290  	//
   291  	// 1. you are accessing a file on S3, and
   292  	//
   293  	// 2. an application may have attempted to GET the same file in recent past
   294  	// (~5 minutes). The said application may be on a different machine.
   295  	//
   296  	// This flag is honored only by S3 to work around the problem where s3 may
   297  	// report spurious KeyNotFound error after a GET request to the same file.
   298  	// For more details, see
   299  	// https://docs.aws.amazon.com/AmazonS3/latest/dev/Introduction.html#CoreConcepts,
   300  	// section "S3 Data Consistency Model". In particular:
   301  	//
   302  	//   The caveat is that if you make a HEAD or GET request to the key
   303  	//   name (to find if the object exists) before creating the object, Amazon S3
   304  	//   provides eventual consistency for read-after-write.
   305  	RetryWhenNotFound bool
   306  
   307  	// When set, Close will ignore NoSuchUpload error from S3
   308  	// CompleteMultiPartUpload and silently returns OK.
   309  	//
   310  	// This is to work around a bug where concurrent uploads to one file sometimes
   311  	// causes an upload request to be lost on the server side.
   312  	// https://console.aws.amazon.com/support/cases?region=us-west-2#/6299905521/en
   313  	// https://github.com/yasushi-saito/s3uploaderror
   314  	//
   315  	// Set this flag only if:
   316  	//
   317  	//  1. you are writing to a file on S3, and
   318  	//
   319  	//  2. possible concurrent writes to the same file produce the same
   320  	//  contents, so you are ok with taking any of them.
   321  	//
   322  	// If you don't set this flag, then concurrent writes to the same file may
   323  	// fail with a NoSuchUpload error, and it is up to you to retry.
   324  	//
   325  	// On non-S3 file systems, this flag is ignored.
   326  	IgnoreNoSuchUpload bool
   327  }