github.com/pachyderm/pachyderm@v1.13.4/src/server/pfs/s3/driver.go (about) 1 package s3 2 3 import ( 4 "fmt" 5 "net/http" 6 "strings" 7 "time" 8 9 "github.com/pachyderm/pachyderm/src/client" 10 "github.com/pachyderm/pachyderm/src/client/pkg/errors" 11 "github.com/pachyderm/pachyderm/src/server/pkg/uuid" 12 13 "github.com/gogo/protobuf/types" 14 "github.com/pachyderm/s2" 15 ) 16 17 // Bucket represents an S3 bucket 18 type Bucket struct { 19 // Repo is the PFS repo that this bucket points to 20 Repo string 21 // Commit is the PFS commit that this repo points to 22 Commit string 23 // Name is the name of the bucket 24 Name string 25 } 26 27 type bucketCapabilities struct { 28 readable bool 29 writable bool 30 historicVersions bool 31 } 32 33 // Driver implementations drive the underlying bucket-related functionality 34 // for an s3gateway instance 35 type Driver interface { 36 listBuckets(pc *client.APIClient, r *http.Request, buckets *[]*s2.Bucket) error 37 bucket(pc *client.APIClient, r *http.Request, name string) (*Bucket, error) 38 bucketCapabilities(pc *client.APIClient, r *http.Request, bucket *Bucket) (bucketCapabilities, error) 39 canModifyBuckets() bool 40 } 41 42 // MasterDriver is the driver for the s3gateway instance running on pachd 43 // master 44 type MasterDriver struct{} 45 46 // NewMasterDriver constructs a new master driver 47 func NewMasterDriver() *MasterDriver { 48 return &MasterDriver{} 49 } 50 51 func (d *MasterDriver) listBuckets(pc *client.APIClient, r *http.Request, buckets *[]*s2.Bucket) error { 52 repos, err := pc.ListRepo() 53 if err != nil { 54 return err 55 } 56 57 for _, repo := range repos { 58 t, err := types.TimestampFromProto(repo.Created) 59 if err != nil { 60 return err 61 } 62 for _, branch := range repo.Branches { 63 *buckets = append(*buckets, &s2.Bucket{ 64 Name: fmt.Sprintf("%s.%s", branch.Name, branch.Repo.Name), 65 CreationDate: t, 66 }) 67 } 68 } 69 70 return nil 71 } 72 73 func (d *MasterDriver) bucket(pc *client.APIClient, r *http.Request, name string) (*Bucket, error) { 74 var repo, commit string 75 commit = "master" 76 // Bucketn name syntax: [commitID.][branch.]repoName 77 parts := strings.SplitN(name, ".", 3) 78 if len(parts) == 3 { 79 // Support commit.branch.repo syntax, so that we are interoperable with v2 syntax 80 // but we don't need the branch info, since repo + commit is unique 81 commit, repo = parts[0], parts[2] 82 } else if len(parts) == 2 { 83 // commit is overloaded, can be either a branch name or a commit_id 84 commit, repo = parts[0], parts[1] 85 } else { 86 repo = parts[0] 87 } 88 89 return &Bucket{ 90 Repo: repo, 91 Commit: commit, 92 Name: name, 93 }, nil 94 } 95 96 func (d *MasterDriver) bucketCapabilities(pc *client.APIClient, r *http.Request, bucket *Bucket) (bucketCapabilities, error) { 97 readable := true 98 // A bucket is readable if the corresponding branch points to a commit 99 if !uuid.IsUUIDWithoutDashes(bucket.Commit) { 100 branchInfo, err := pc.InspectBranch(bucket.Repo, bucket.Commit) 101 if err != nil { 102 return bucketCapabilities{}, maybeNotFoundError(r, err) 103 } 104 readable = branchInfo.Head != nil 105 } 106 107 return bucketCapabilities{ 108 readable: readable, 109 writable: true, 110 historicVersions: true, 111 }, nil 112 } 113 114 func (d *MasterDriver) canModifyBuckets() bool { 115 return true 116 } 117 118 // WorkerDriver is the driver for the s3gateway instance running on pachd 119 // workers 120 type WorkerDriver struct { 121 inputBuckets []*Bucket 122 outputBucket *Bucket 123 namesMap map[string]*Bucket 124 } 125 126 // NewWorkerDriver creates a new worker driver. `inputBuckets` is a list of 127 // whitelisted buckets to be served from input repos. `outputBucket` is the 128 // whitelisted bucket to be served from an output repo. If `nil`, no output 129 // bucket will be available. 130 func NewWorkerDriver(inputBuckets []*Bucket, outputBucket *Bucket) *WorkerDriver { 131 namesMap := map[string]*Bucket{} 132 133 for _, ib := range inputBuckets { 134 namesMap[ib.Name] = ib 135 } 136 137 if outputBucket != nil { 138 namesMap[outputBucket.Name] = outputBucket 139 } 140 141 return &WorkerDriver{ 142 inputBuckets: inputBuckets, 143 outputBucket: outputBucket, 144 namesMap: namesMap, 145 } 146 } 147 148 func (d *WorkerDriver) listBuckets(pc *client.APIClient, r *http.Request, buckets *[]*s2.Bucket) error { 149 repos, err := pc.ListRepo() 150 if err != nil { 151 return err 152 } 153 timestamps := map[string]time.Time{} 154 for _, repo := range repos { 155 timestamp, err := types.TimestampFromProto(repo.Created) 156 if err != nil { 157 return err 158 } 159 timestamps[repo.Repo.Name] = timestamp 160 } 161 162 for _, bucket := range d.namesMap { 163 timestamp, ok := timestamps[bucket.Repo] 164 if !ok { 165 return errors.Errorf("worker s3gateway configuration includes repo %q, which does not exist", bucket.Repo) 166 } 167 *buckets = append(*buckets, &s2.Bucket{ 168 Name: bucket.Name, 169 CreationDate: timestamp, 170 }) 171 } 172 173 return nil 174 } 175 176 func (d *WorkerDriver) bucket(pc *client.APIClient, r *http.Request, name string) (*Bucket, error) { 177 bucket := d.namesMap[name] 178 if bucket == nil { 179 return &Bucket{ 180 Name: name, 181 }, nil 182 } 183 return bucket, nil 184 } 185 186 func (d *WorkerDriver) bucketCapabilities(pc *client.APIClient, r *http.Request, bucket *Bucket) (bucketCapabilities, error) { 187 if bucket.Repo == "" || bucket.Commit == "" { 188 return bucketCapabilities{}, s2.NoSuchBucketError(r) 189 } else if bucket == d.outputBucket { 190 return bucketCapabilities{ 191 readable: false, 192 writable: true, 193 historicVersions: false, 194 }, nil 195 } 196 return bucketCapabilities{ 197 readable: true, 198 writable: false, 199 historicVersions: false, 200 }, nil 201 } 202 203 func (d *WorkerDriver) canModifyBuckets() bool { 204 return false 205 }