github.com/pachyderm/pachyderm@v1.13.4/src/server/pfs/s3/s3.go (about)

     1  package s3
     2  
     3  import (
     4  	"fmt"
     5  	stdlog "log"
     6  	"net/http"
     7  	"time"
     8  
     9  	"github.com/gorilla/mux"
    10  	"github.com/pachyderm/pachyderm/src/client"
    11  	"github.com/pachyderm/pachyderm/src/server/pkg/serviceenv"
    12  
    13  	"github.com/pachyderm/s2"
    14  	"github.com/sirupsen/logrus"
    15  )
    16  
    17  // ClientFactory is a function called by s3g to create request-scoped
    18  // pachyderm clients
    19  type ClientFactory = func() (*client.APIClient, error)
    20  
    21  const (
    22  	multipartRepo        = "_s3gateway_multipart_"
    23  	maxAllowedParts      = 10000
    24  	maxRequestBodyLength = 128 * 1024 * 1024 //128mb
    25  	requestTimeout       = 20 * time.Minute
    26  	readBodyTimeout      = requestTimeout / 2
    27  
    28  	// The S3 storage class that all PFS content will be reported to be stored in
    29  	globalStorageClass = "STANDARD"
    30  
    31  	// The S3 location served back
    32  	globalLocation = "PACHYDERM"
    33  )
    34  
    35  // The S3 user associated with all PFS content
    36  var defaultUser = s2.User{ID: "00000000000000000000000000000000", DisplayName: "pachyderm"}
    37  
    38  type controller struct {
    39  	// app environment -- clients to other services, env vars, etc
    40  	env *serviceenv.ServiceEnv
    41  
    42  	// custom logger
    43  	logger *logrus.Entry
    44  
    45  	// name of the PFS repo holding multipart content
    46  	repo string
    47  
    48  	// the maximum number of allowed parts that can be associated with any
    49  	// given file
    50  	maxAllowedParts int
    51  
    52  	driver Driver
    53  }
    54  
    55  // requestPachClient uses the clientFactory to construct a request-scoped
    56  // pachyderm client
    57  func (c *controller) requestClient(r *http.Request) (*client.APIClient, error) {
    58  	pc := c.env.GetPachClient(r.Context())
    59  
    60  	vars := mux.Vars(r)
    61  	if vars["s3gAuth"] != "disabled" {
    62  		accessKey := vars["authAccessKey"]
    63  		if accessKey != "" {
    64  			pc.SetAuthToken(accessKey)
    65  		}
    66  	}
    67  
    68  	return pc, nil
    69  }
    70  
    71  // Server runs an HTTP server with an S3-like API for PFS. This allows you to
    72  // use s3 clients to access PFS contents.
    73  //
    74  // `inputBuckets` specifies which buckets should be served, referencing
    75  // specific commit IDs. If nil, all PFS branches will be served as separate
    76  // buckets, of the form `<branch name>.<bucket name>`. Some s3 features are
    77  // enabled when all PFS branches are served as well; e.g. we add support for
    78  // some s3 versioning functionality.
    79  //
    80  // This returns an `http.Server` instance. It is the responsibility of the
    81  // caller to start the returned server. It's possible for the caller to
    82  // gracefully shutdown the server if desired; see the `http` package for details.
    83  //
    84  // Note: server errors are redirected to logrus' standard log writer. The log
    85  // writer is never closed. This should not be a problem with logrus' default
    86  // configuration, which just writes to stdio. But if the standard logger is
    87  // overwritten (e.g. to write to a socket), it's possible for this to cause
    88  // problems.
    89  //
    90  // Note: In `s3cmd`, you must set the access key and secret key, even though
    91  // this API will ignore them - otherwise, you'll get an opaque config error:
    92  // https://github.com/s3tools/s3cmd/issues/845#issuecomment-464885959
    93  func Server(env *serviceenv.ServiceEnv, driver Driver) (*http.Server, error) {
    94  	logger := logrus.WithFields(logrus.Fields{
    95  		"source": "s3gateway",
    96  	})
    97  
    98  	c := &controller{
    99  		env:             env,
   100  		logger:          logger,
   101  		repo:            multipartRepo,
   102  		maxAllowedParts: maxAllowedParts,
   103  		driver:          driver,
   104  	}
   105  
   106  	s3Server := s2.NewS2(logger, 0, readBodyTimeout)
   107  	s3Server.Auth = c
   108  	s3Server.Service = c
   109  	s3Server.Bucket = c
   110  	s3Server.Object = c
   111  	s3Server.Multipart = c
   112  	router := s3Server.Router()
   113  
   114  	server := &http.Server{
   115  		Addr:         fmt.Sprintf(":%d", env.S3GatewayPort),
   116  		ReadTimeout:  requestTimeout,
   117  		WriteTimeout: requestTimeout,
   118  		Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   119  			// Log that a request was made
   120  			logger.Infof("http request: %s %s", r.Method, r.RequestURI)
   121  			router.ServeHTTP(w, r)
   122  		}),
   123  		// NOTE: this is not closed. If the standard logger gets customized, this will need to be fixed
   124  		ErrorLog: stdlog.New(logger.Writer(), "", 0),
   125  	}
   126  
   127  	return server, nil
   128  }