github.com/pachyderm/pachyderm@v1.13.4/src/server/pfs/s3/multipart.go (about)

     1  package s3
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"net/http"
     7  	"path"
     8  	"regexp"
     9  	"strconv"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/gogo/protobuf/types"
    14  	"github.com/pachyderm/pachyderm/src/client"
    15  	pfsClient "github.com/pachyderm/pachyderm/src/client/pfs"
    16  	"github.com/pachyderm/pachyderm/src/client/pkg/errors"
    17  	pfsServer "github.com/pachyderm/pachyderm/src/server/pfs"
    18  	"github.com/pachyderm/pachyderm/src/server/pkg/errutil"
    19  	"github.com/pachyderm/pachyderm/src/server/pkg/uuid"
    20  
    21  	"github.com/pachyderm/s2"
    22  )
    23  
    24  var multipartChunkPathMatcher = regexp.MustCompile(`([^/]+)/([^/]+)/(.+)/([^/]+)/(\d+)`)
    25  var multipartKeepPathMatcher = regexp.MustCompile(`([^/]+)/([^/]+)/(.+)/([^/]+)/\.keep`)
    26  
    27  func multipartChunkArgs(path string) (repo string, branch string, key string, uploadID string, partNumber int, err error) {
    28  	match := multipartChunkPathMatcher.FindStringSubmatch(path)
    29  
    30  	if len(match) == 0 {
    31  		err = errors.New("invalid file path found in multipath bucket")
    32  		return
    33  	}
    34  
    35  	repo = match[1]
    36  	branch = match[2]
    37  	key = match[3]
    38  	uploadID = match[4]
    39  	partNumber, err = strconv.Atoi(match[5])
    40  	if err != nil {
    41  		err = errors.Wrapf(err, "invalid file path found in multipath bucket")
    42  		return
    43  	}
    44  	return
    45  }
    46  
    47  func multipartKeepArgs(path string) (repo string, branch string, key string, uploadID string, err error) {
    48  	match := multipartKeepPathMatcher.FindStringSubmatch(path)
    49  
    50  	if len(match) == 0 {
    51  		err = errors.New("invalid file path found in multipath bucket")
    52  		return
    53  	}
    54  
    55  	repo = match[1]
    56  	branch = match[2]
    57  	key = match[3]
    58  	uploadID = match[4]
    59  	return
    60  }
    61  
    62  func parentDirPath(repo, branch, key, uploadID string) string {
    63  	return path.Join(repo, branch, key, uploadID)
    64  }
    65  
    66  func chunkPath(repo, branch, key, uploadID string, partNumber int) string {
    67  	return path.Join(parentDirPath(repo, branch, key, uploadID), strconv.Itoa(partNumber))
    68  }
    69  
    70  func keepPath(repo, branch, key, uploadID string) string {
    71  	return path.Join(parentDirPath(repo, branch, key, uploadID), ".keep")
    72  }
    73  
    74  func (c *controller) ensureRepo(pc *client.APIClient) error {
    75  	_, err := pc.InspectBranch(c.repo, "master")
    76  	if err != nil {
    77  		err = pc.UpdateRepo(c.repo)
    78  		if err != nil {
    79  			return err
    80  		}
    81  
    82  		err = pc.CreateBranch(c.repo, "master", "", nil)
    83  		if err != nil {
    84  			return err
    85  		}
    86  	}
    87  
    88  	return nil
    89  }
    90  
    91  func (c *controller) ListMultipart(r *http.Request, bucketName, keyMarker, uploadIDMarker string, maxUploads int) (*s2.ListMultipartResult, error) {
    92  	c.logger.Debugf("ListMultipart: bucketName=%+v, keyMarker=%+v, uploadIDMarker=%+v, maxUploads=%+v", bucketName, keyMarker, uploadIDMarker, maxUploads)
    93  
    94  	pc, err := c.requestClient(r)
    95  	if err != nil {
    96  		return nil, err
    97  	}
    98  
    99  	if err = c.ensureRepo(pc); err != nil {
   100  		return nil, err
   101  	}
   102  
   103  	bucket, err := c.driver.bucket(pc, r, bucketName)
   104  	if err != nil {
   105  		return nil, err
   106  	}
   107  
   108  	result := s2.ListMultipartResult{
   109  		Uploads: []*s2.Upload{},
   110  	}
   111  
   112  	globPattern := path.Join(bucket.Repo, bucket.Commit, "*", "*", ".keep")
   113  	err = pc.GlobFileF(c.repo, "master", globPattern, func(fileInfo *pfsClient.FileInfo) error {
   114  		_, _, key, uploadID, err := multipartKeepArgs(fileInfo.File.Path)
   115  		if err != nil {
   116  			return nil
   117  		}
   118  
   119  		if key <= keyMarker || uploadID <= uploadIDMarker {
   120  			return nil
   121  		}
   122  
   123  		if len(result.Uploads) >= maxUploads {
   124  			if maxUploads > 0 {
   125  				result.IsTruncated = true
   126  			}
   127  			return errutil.ErrBreak
   128  		}
   129  
   130  		timestamp, err := types.TimestampFromProto(fileInfo.Committed)
   131  		if err != nil {
   132  			return err
   133  		}
   134  
   135  		result.Uploads = append(result.Uploads, &s2.Upload{
   136  			Key:          key,
   137  			UploadID:     uploadID,
   138  			Initiator:    defaultUser,
   139  			StorageClass: globalStorageClass,
   140  			Initiated:    timestamp,
   141  		})
   142  
   143  		return nil
   144  	})
   145  
   146  	return &result, err
   147  }
   148  
   149  func (c *controller) InitMultipart(r *http.Request, bucketName, key string) (string, error) {
   150  	c.logger.Debugf("InitMultipart: bucketName=%+v, key=%+v", bucketName, key)
   151  
   152  	pc, err := c.requestClient(r)
   153  	if err != nil {
   154  		return "", err
   155  	}
   156  
   157  	if err = c.ensureRepo(pc); err != nil {
   158  		return "", err
   159  	}
   160  
   161  	bucket, err := c.driver.bucket(pc, r, bucketName)
   162  	if err != nil {
   163  		return "", err
   164  	}
   165  	bucketCaps, err := c.driver.bucketCapabilities(pc, r, bucket)
   166  	if err != nil {
   167  		return "", err
   168  	}
   169  	if !bucketCaps.writable {
   170  		return "", s2.NotImplementedError(r)
   171  	}
   172  
   173  	uploadID := uuid.NewWithoutDashes()
   174  
   175  	_, err = pc.PutFileOverwrite(c.repo, "master", keepPath(bucket.Repo, bucket.Commit, key, uploadID), strings.NewReader(""), 0)
   176  	if err != nil {
   177  		return "", err
   178  	}
   179  
   180  	return uploadID, nil
   181  }
   182  
   183  func (c *controller) AbortMultipart(r *http.Request, bucketName, key, uploadID string) (retErr error) {
   184  	c.logger.Infof("AbortMultipart: bucketName=%+v, key=%+v, uploadID=%+v", bucketName, key, uploadID)
   185  	defer func(start time.Time) {
   186  		c.logger.Infof("AbortMultipart: duration=%v, error=%v", time.Since(start), retErr)
   187  	}(time.Now())
   188  	pc, err := c.requestClient(r)
   189  	if err != nil {
   190  		return err
   191  	}
   192  
   193  	if err = c.ensureRepo(pc); err != nil {
   194  		return err
   195  	}
   196  
   197  	bucket, err := c.driver.bucket(pc, r, bucketName)
   198  	if err != nil {
   199  		return err
   200  	}
   201  
   202  	_, err = pc.InspectFile(c.repo, "master", keepPath(bucket.Repo, bucket.Commit, key, uploadID))
   203  	if err != nil {
   204  		return s2.NoSuchUploadError(r)
   205  	}
   206  
   207  	err = pc.DeleteFile(c.repo, "master", parentDirPath(bucket.Repo, bucket.Commit, key, uploadID))
   208  	if err != nil {
   209  		return s2.InternalError(r, err)
   210  	}
   211  
   212  	return nil
   213  }
   214  
   215  func (c *controller) CompleteMultipart(r *http.Request, bucketName, key, uploadID string, parts []*s2.Part) (res *s2.CompleteMultipartResult, retErr error) {
   216  	c.logger.Debugf("CompleteMultipart: bucketName=%+v, key=%+v, uploadID=%+v, parts=%+v", bucketName, key, uploadID, parts)
   217  	defer func(start time.Time) {
   218  		c.logger.Infof("CompleteMultipart: duration=%v, result=%+v, error=%v", time.Since(start), res, retErr)
   219  	}(time.Now())
   220  	pc, err := c.requestClient(r)
   221  	if err != nil {
   222  		return nil, err
   223  	}
   224  
   225  	if err = c.ensureRepo(pc); err != nil {
   226  		return nil, err
   227  	}
   228  
   229  	bucket, err := c.driver.bucket(pc, r, bucketName)
   230  	if err != nil {
   231  		return nil, err
   232  	}
   233  	bucketCaps, err := c.driver.bucketCapabilities(pc, r, bucket)
   234  	if err != nil {
   235  		return nil, err
   236  	}
   237  	if !bucketCaps.writable {
   238  		return nil, s2.NotImplementedError(r)
   239  	}
   240  
   241  	_, err = pc.InspectFile(c.repo, "master", keepPath(bucket.Repo, bucket.Commit, key, uploadID))
   242  	if err != nil {
   243  		if pfsServer.IsFileNotFoundErr(err) {
   244  			return nil, s2.NoSuchUploadError(r)
   245  		}
   246  		return nil, err
   247  	}
   248  
   249  	// S3 "supports" concurrent complete calls on the same upload ID.
   250  	// Write to a random file ID in our directory to avoid conflict
   251  	dstPath := uuid.NewWithoutDashes()
   252  
   253  	for i, part := range parts {
   254  		srcPath := chunkPath(bucket.Repo, bucket.Commit, key, uploadID, part.PartNumber)
   255  
   256  		fileInfo, err := pc.InspectFile(c.repo, "master", srcPath)
   257  		if err != nil {
   258  			if pfsServer.IsFileNotFoundErr(err) {
   259  				return nil, s2.InvalidPartError(r)
   260  			}
   261  			return nil, err
   262  		}
   263  
   264  		// Only verify the ETag when it's of the same length as PFS file
   265  		// hashes. This is because s3 clients will generally use md5 for
   266  		// ETags, and would otherwise fail.
   267  		expectedETag := fmt.Sprintf("%x", fileInfo.Hash)
   268  		if len(part.ETag) == len(expectedETag) && part.ETag != expectedETag {
   269  			return nil, s2.InvalidPartError(r)
   270  		}
   271  
   272  		if i < len(parts)-1 && fileInfo.SizeBytes < 5*1024*1024 {
   273  			// each part, except for the last, is expected to be at least 5mb
   274  			// in s3
   275  			return nil, s2.EntityTooSmallError(r)
   276  		}
   277  
   278  		if err := pc.CopyFile(c.repo, "master", srcPath, c.repo, "master", dstPath, false); err != nil {
   279  			return nil, err
   280  		}
   281  	}
   282  
   283  	// overwrite file, for "last write wins" behavior
   284  	if err := pc.CopyFile(c.repo, "master", dstPath, bucket.Repo, bucket.Commit, key, true); err != nil {
   285  		if errutil.IsWriteToOutputBranchError(err) {
   286  			return nil, writeToOutputBranchError(r)
   287  		}
   288  		return nil, err
   289  	}
   290  
   291  	if err := pc.DeleteFile(c.repo, "master", parentDirPath(bucket.Repo, bucket.Commit, key, uploadID)); err != nil {
   292  		return nil, err
   293  	}
   294  
   295  	fileInfo, err := pc.InspectFile(bucket.Repo, bucket.Commit, key)
   296  	if err != nil && !pfsServer.IsOutputCommitNotFinishedErr(err) {
   297  		return nil, err
   298  	}
   299  
   300  	result := s2.CompleteMultipartResult{Location: globalLocation}
   301  	if fileInfo != nil {
   302  		result.ETag = fmt.Sprintf("%x", fileInfo.Hash)
   303  		result.Version = fileInfo.File.Commit.ID
   304  	}
   305  
   306  	return &result, nil
   307  }
   308  
   309  func (c *controller) ListMultipartChunks(r *http.Request, bucketName, key, uploadID string, partNumberMarker, maxParts int) (*s2.ListMultipartChunksResult, error) {
   310  	c.logger.Debugf("ListMultipartChunks: bucketName=%+v, key=%+v, uploadID=%+v, partNumberMarker=%+v, maxParts=%+v", bucketName, key, uploadID, partNumberMarker, maxParts)
   311  
   312  	pc, err := c.requestClient(r)
   313  	if err != nil {
   314  		return nil, err
   315  	}
   316  
   317  	if err = c.ensureRepo(pc); err != nil {
   318  		return nil, err
   319  	}
   320  
   321  	bucket, err := c.driver.bucket(pc, r, bucketName)
   322  	if err != nil {
   323  		return nil, err
   324  	}
   325  
   326  	result := s2.ListMultipartChunksResult{
   327  		Initiator:    &defaultUser,
   328  		Owner:        &defaultUser,
   329  		StorageClass: globalStorageClass,
   330  		Parts:        []*s2.Part{},
   331  	}
   332  
   333  	globPattern := path.Join(parentDirPath(bucket.Repo, bucket.Commit, key, uploadID), "*")
   334  	err = pc.GlobFileF(c.repo, "master", globPattern, func(fileInfo *pfsClient.FileInfo) error {
   335  		_, _, _, _, partNumber, err := multipartChunkArgs(fileInfo.File.Path)
   336  		if err != nil {
   337  			return nil
   338  		}
   339  
   340  		if partNumber <= partNumberMarker {
   341  			return nil
   342  		}
   343  
   344  		if len(result.Parts) >= maxParts {
   345  			if maxParts > 0 {
   346  				result.IsTruncated = true
   347  			}
   348  			return errutil.ErrBreak
   349  		}
   350  
   351  		result.Parts = append(result.Parts, &s2.Part{
   352  			PartNumber: partNumber,
   353  			ETag:       fmt.Sprintf("%x", fileInfo.Hash),
   354  		})
   355  
   356  		return nil
   357  	})
   358  
   359  	return &result, err
   360  }
   361  
   362  func (c *controller) UploadMultipartChunk(r *http.Request, bucketName, key, uploadID string, partNumber int, reader io.Reader) (string, error) {
   363  	c.logger.Debugf("UploadMultipartChunk: bucketName=%+v, key=%+v, uploadID=%+v partNumber=%+v", bucketName, key, uploadID, partNumber)
   364  
   365  	pc, err := c.requestClient(r)
   366  	if err != nil {
   367  		return "", err
   368  	}
   369  
   370  	if err = c.ensureRepo(pc); err != nil {
   371  		return "", err
   372  	}
   373  
   374  	bucket, err := c.driver.bucket(pc, r, bucketName)
   375  	if err != nil {
   376  		return "", err
   377  	}
   378  
   379  	_, err = pc.InspectFile(c.repo, "master", keepPath(bucket.Repo, bucket.Commit, key, uploadID))
   380  	if err != nil {
   381  		if pfsServer.IsFileNotFoundErr(err) {
   382  			return "", s2.NoSuchUploadError(r)
   383  		}
   384  		return "", err
   385  	}
   386  
   387  	path := chunkPath(bucket.Repo, bucket.Commit, key, uploadID, partNumber)
   388  	_, err = pc.PutFileOverwrite(c.repo, "master", path, reader, 0)
   389  	if err != nil {
   390  		return "", err
   391  	}
   392  
   393  	fileInfo, err := pc.InspectFile(c.repo, "master", path)
   394  	if err != nil {
   395  		return "", err
   396  	}
   397  
   398  	return fmt.Sprintf("%x", fileInfo.Hash), nil
   399  }