github.com/pachyderm/pachyderm@v1.13.4/src/server/pfs/s3/driver.go (about)

     1  package s3
     2  
     3  import (
     4  	"fmt"
     5  	"net/http"
     6  	"strings"
     7  	"time"
     8  
     9  	"github.com/pachyderm/pachyderm/src/client"
    10  	"github.com/pachyderm/pachyderm/src/client/pkg/errors"
    11  	"github.com/pachyderm/pachyderm/src/server/pkg/uuid"
    12  
    13  	"github.com/gogo/protobuf/types"
    14  	"github.com/pachyderm/s2"
    15  )
    16  
    17  // Bucket represents an S3 bucket
    18  type Bucket struct {
    19  	// Repo is the PFS repo that this bucket points to
    20  	Repo string
    21  	// Commit is the PFS commit that this repo points to
    22  	Commit string
    23  	// Name is the name of the bucket
    24  	Name string
    25  }
    26  
    27  type bucketCapabilities struct {
    28  	readable         bool
    29  	writable         bool
    30  	historicVersions bool
    31  }
    32  
    33  // Driver implementations drive the underlying bucket-related functionality
    34  // for an s3gateway instance
    35  type Driver interface {
    36  	listBuckets(pc *client.APIClient, r *http.Request, buckets *[]*s2.Bucket) error
    37  	bucket(pc *client.APIClient, r *http.Request, name string) (*Bucket, error)
    38  	bucketCapabilities(pc *client.APIClient, r *http.Request, bucket *Bucket) (bucketCapabilities, error)
    39  	canModifyBuckets() bool
    40  }
    41  
    42  // MasterDriver is the driver for the s3gateway instance running on pachd
    43  // master
    44  type MasterDriver struct{}
    45  
    46  // NewMasterDriver constructs a new master driver
    47  func NewMasterDriver() *MasterDriver {
    48  	return &MasterDriver{}
    49  }
    50  
    51  func (d *MasterDriver) listBuckets(pc *client.APIClient, r *http.Request, buckets *[]*s2.Bucket) error {
    52  	repos, err := pc.ListRepo()
    53  	if err != nil {
    54  		return err
    55  	}
    56  
    57  	for _, repo := range repos {
    58  		t, err := types.TimestampFromProto(repo.Created)
    59  		if err != nil {
    60  			return err
    61  		}
    62  		for _, branch := range repo.Branches {
    63  			*buckets = append(*buckets, &s2.Bucket{
    64  				Name:         fmt.Sprintf("%s.%s", branch.Name, branch.Repo.Name),
    65  				CreationDate: t,
    66  			})
    67  		}
    68  	}
    69  
    70  	return nil
    71  }
    72  
    73  func (d *MasterDriver) bucket(pc *client.APIClient, r *http.Request, name string) (*Bucket, error) {
    74  	var repo, commit string
    75  	commit = "master"
    76  	// Bucketn name syntax: [commitID.][branch.]repoName
    77  	parts := strings.SplitN(name, ".", 3)
    78  	if len(parts) == 3 {
    79  		// Support commit.branch.repo syntax, so that we are interoperable with v2 syntax
    80  		// but we don't need the branch info, since repo + commit is unique
    81  		commit, repo = parts[0], parts[2]
    82  	} else if len(parts) == 2 {
    83  		// commit is overloaded, can be either a branch name or a commit_id
    84  		commit, repo = parts[0], parts[1]
    85  	} else {
    86  		repo = parts[0]
    87  	}
    88  
    89  	return &Bucket{
    90  		Repo:   repo,
    91  		Commit: commit,
    92  		Name:   name,
    93  	}, nil
    94  }
    95  
    96  func (d *MasterDriver) bucketCapabilities(pc *client.APIClient, r *http.Request, bucket *Bucket) (bucketCapabilities, error) {
    97  	readable := true
    98  	// A bucket is readable if the corresponding branch points to a commit
    99  	if !uuid.IsUUIDWithoutDashes(bucket.Commit) {
   100  		branchInfo, err := pc.InspectBranch(bucket.Repo, bucket.Commit)
   101  		if err != nil {
   102  			return bucketCapabilities{}, maybeNotFoundError(r, err)
   103  		}
   104  		readable = branchInfo.Head != nil
   105  	}
   106  
   107  	return bucketCapabilities{
   108  		readable:         readable,
   109  		writable:         true,
   110  		historicVersions: true,
   111  	}, nil
   112  }
   113  
   114  func (d *MasterDriver) canModifyBuckets() bool {
   115  	return true
   116  }
   117  
   118  // WorkerDriver is the driver for the s3gateway instance running on pachd
   119  // workers
   120  type WorkerDriver struct {
   121  	inputBuckets []*Bucket
   122  	outputBucket *Bucket
   123  	namesMap     map[string]*Bucket
   124  }
   125  
   126  // NewWorkerDriver creates a new worker driver. `inputBuckets` is a list of
   127  // whitelisted buckets to be served from input repos. `outputBucket` is the
   128  // whitelisted bucket to be served from an output repo. If `nil`, no output
   129  // bucket will be available.
   130  func NewWorkerDriver(inputBuckets []*Bucket, outputBucket *Bucket) *WorkerDriver {
   131  	namesMap := map[string]*Bucket{}
   132  
   133  	for _, ib := range inputBuckets {
   134  		namesMap[ib.Name] = ib
   135  	}
   136  
   137  	if outputBucket != nil {
   138  		namesMap[outputBucket.Name] = outputBucket
   139  	}
   140  
   141  	return &WorkerDriver{
   142  		inputBuckets: inputBuckets,
   143  		outputBucket: outputBucket,
   144  		namesMap:     namesMap,
   145  	}
   146  }
   147  
   148  func (d *WorkerDriver) listBuckets(pc *client.APIClient, r *http.Request, buckets *[]*s2.Bucket) error {
   149  	repos, err := pc.ListRepo()
   150  	if err != nil {
   151  		return err
   152  	}
   153  	timestamps := map[string]time.Time{}
   154  	for _, repo := range repos {
   155  		timestamp, err := types.TimestampFromProto(repo.Created)
   156  		if err != nil {
   157  			return err
   158  		}
   159  		timestamps[repo.Repo.Name] = timestamp
   160  	}
   161  
   162  	for _, bucket := range d.namesMap {
   163  		timestamp, ok := timestamps[bucket.Repo]
   164  		if !ok {
   165  			return errors.Errorf("worker s3gateway configuration includes repo %q, which does not exist", bucket.Repo)
   166  		}
   167  		*buckets = append(*buckets, &s2.Bucket{
   168  			Name:         bucket.Name,
   169  			CreationDate: timestamp,
   170  		})
   171  	}
   172  
   173  	return nil
   174  }
   175  
   176  func (d *WorkerDriver) bucket(pc *client.APIClient, r *http.Request, name string) (*Bucket, error) {
   177  	bucket := d.namesMap[name]
   178  	if bucket == nil {
   179  		return &Bucket{
   180  			Name: name,
   181  		}, nil
   182  	}
   183  	return bucket, nil
   184  }
   185  
   186  func (d *WorkerDriver) bucketCapabilities(pc *client.APIClient, r *http.Request, bucket *Bucket) (bucketCapabilities, error) {
   187  	if bucket.Repo == "" || bucket.Commit == "" {
   188  		return bucketCapabilities{}, s2.NoSuchBucketError(r)
   189  	} else if bucket == d.outputBucket {
   190  		return bucketCapabilities{
   191  			readable:         false,
   192  			writable:         true,
   193  			historicVersions: false,
   194  		}, nil
   195  	}
   196  	return bucketCapabilities{
   197  		readable:         true,
   198  		writable:         false,
   199  		historicVersions: false,
   200  	}, nil
   201  }
   202  
   203  func (d *WorkerDriver) canModifyBuckets() bool {
   204  	return false
   205  }