github.com/grailbio/base@v0.0.11/file/doc.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache-2.0 3 // license that can be found in the LICENSE file. 4 5 // Package file provides basic file operations across multiple file-system 6 // types. It is designed for use in applications that operate uniformly on 7 // multiple storage types, such as local files, S3 and HTTP. 8 // 9 // Overview 10 // 11 // This package is designed with following goals: 12 // 13 // - Support popular file systems, especially S3 and the local file system. 14 // 15 // - Define operation semantics that are implementable on all the supported file 16 // systems, yet practical and usable. 17 // 18 // - Extensible. Provide leeway to do things like registering new file system 19 // types or ticket-based authorizations. 20 // 21 // This package defines two key interfaces, Implementation and File. 22 // 23 // - Implementation provides filesystem operations, such as Open, Remove, and List 24 // (directory walking). 25 // 26 // - File implements operations on a file. It is created by 27 // Implementation.{Open,Create} calls. File is similar to go's os.File object 28 // but provides limited functionality. 29 // 30 // Reading and writing files 31 // 32 // The following snippet shows registering an S3 implementation, then writing 33 // and reading a S3 file. 34 // 35 // import ( 36 // "context" 37 // "ioutil" 38 // 39 // "github.com/grailbio/base/file" 40 // "github.com/grailbio/base/file/s3file" // file.Implementation implementation for S3 41 // "github.com/aws/aws-sdk-go/aws/session" 42 // "github.com/stretchr/testify/require" 43 // ) 44 // 45 // func init() { 46 // file.RegisterImplementation("s3", s3file.NewImplementation( 47 // s3file.NewDefaultProvider())) 48 // } 49 // 50 // // Caution: this code ignores all errors. 51 // func WriteTest() { 52 // ctx := context.Background() 53 // f, err := file.Create(ctx, "s3://grail-saito/tmp/test.txt") 54 // n, err = f.Writer(ctx).Write([]byte{"Hello"}) 55 // err = f.Close(ctx) 56 // } 57 // 58 // func ReadTest() { 59 // ctx := context.Background() 60 // f, err := file.Open(ctx, "s3://grail-saito/tmp/test.txt") 61 // data, err := ioutil.ReadAll(f.Reader(ctx)) 62 // err = f.Close(ctx) 63 // } 64 // 65 // To open a file for reading or writing, run file.Open("s3://bucket/key") or 66 // file.Create("s3://bucket/key"). A File object does not implement an io.Reader 67 // or io.Writer directly. Instead, you must call File.Reader or File.Writer to 68 // start reading or writing. These methods are split from the File itself so 69 // that an application can pass different contexts to different I/O operations. 70 // 71 // File-system operations 72 // 73 // The file package provides functions similar to those in the standard os 74 // class. For example, file.Remove("s3://bucket/key") removes a file, and 75 // file.Stat("s3://bucket/key") provides a metadata about the file. 76 // 77 // Pathname utility functions 78 // 79 // The file package also provides functions that are similar to those in the 80 // standard filepath package. Functions file.Base, file.Dir, file.Join work just 81 // like filepath.{Base,Dir,Join}, except that they handle the URL pathnames 82 // properly. For example, file.Join("s3://foo", "bar") will return 83 // "s3://foo/bar", whereas filepath.Join("s3://foo", "bar") would return 84 // "s3:/foo/bar". 85 // 86 // Registering a filesystem implementation 87 // 88 // Function RegisterImplementation associates an implementation to a scheme 89 // ("s3", "http", "git", etc). A local file system implementation is 90 // automatically available without any explicit 91 // registration. RegisterImplementation is usually invoked when a process starts 92 // up, for all the supported file system types. For example: 93 // 94 // import ( 95 // "ioutil" 96 // "github.com/grailbio/base/context" 97 // "github.com/grailbio/base/file" 98 // "github.com/grailbio/base/file/s3file" // file.Implementation implementation for S3 99 // ) 100 // func init() { 101 // file.RegisterImplementation("s3:", s3file.NewImplementation(...)) 102 // } 103 // func main() { 104 // ctx := context.Background() 105 // f, err := file.Open(ctx, "s3://somebucket/foo.txt") 106 // data, err := ioutil.ReadAll(f.Reader(ctx)) 107 // err := f.Close(ctx) 108 // ... 109 // } 110 // 111 // Once an implementation is registered, the files for that scheme can be opened 112 // or created using "scheme:name" pathname. 113 // 114 // Differences from the os package 115 // 116 // The file package is similar to Go's standard os package. The differences are 117 // the following. 118 // 119 // - The file package focuses on providing a file-like API for object storage 120 // systems, such as S3 or GCS. 121 // 122 // - Mutations to a File are restricted to whole-file writes. There is no option 123 // to overwrite a part of an existing file. 124 // 125 // - All the operations take a context parameter. 126 // 127 // - file.File does not implement io.Reader nor io.Writer directly. One must 128 // call File.Reader or File.Writer methods to obtains a reader or writer object. 129 // 130 // - Directories are simulated in a best-effort manner on implementations that do 131 // not support directories as first-class entities, such as S3. Lister provides 132 // IsDir() for the current path. Info(path) returns nil for directories. 133 // 134 // Concurrency 135 // 136 // The Implementation and File provide an open-close consistency. More 137 // specifically, this package linearizes fileops, with a fileop defined in the 138 // following way: fileop is a set of operations, starting from 139 // Implementation.{Open,Create}, followed by read/write/stat operations on the 140 // file, followed by File.Close. Operations such as 141 // Implementation.{Stat,Remove,List} and Lister.Scan form a singleton fileop. 142 // 143 // Caution: a local file system on NFS (w/o cache leasing) doesn't provide this 144 // guarantee. Use NFS at your own risk. 145 package file