github.com/pachyderm/pachyderm@v1.13.4/src/server/pfs/s3/multipart.go (about) 1 package s3 2 3 import ( 4 "fmt" 5 "io" 6 "net/http" 7 "path" 8 "regexp" 9 "strconv" 10 "strings" 11 "time" 12 13 "github.com/gogo/protobuf/types" 14 "github.com/pachyderm/pachyderm/src/client" 15 pfsClient "github.com/pachyderm/pachyderm/src/client/pfs" 16 "github.com/pachyderm/pachyderm/src/client/pkg/errors" 17 pfsServer "github.com/pachyderm/pachyderm/src/server/pfs" 18 "github.com/pachyderm/pachyderm/src/server/pkg/errutil" 19 "github.com/pachyderm/pachyderm/src/server/pkg/uuid" 20 21 "github.com/pachyderm/s2" 22 ) 23 24 var multipartChunkPathMatcher = regexp.MustCompile(`([^/]+)/([^/]+)/(.+)/([^/]+)/(\d+)`) 25 var multipartKeepPathMatcher = regexp.MustCompile(`([^/]+)/([^/]+)/(.+)/([^/]+)/\.keep`) 26 27 func multipartChunkArgs(path string) (repo string, branch string, key string, uploadID string, partNumber int, err error) { 28 match := multipartChunkPathMatcher.FindStringSubmatch(path) 29 30 if len(match) == 0 { 31 err = errors.New("invalid file path found in multipath bucket") 32 return 33 } 34 35 repo = match[1] 36 branch = match[2] 37 key = match[3] 38 uploadID = match[4] 39 partNumber, err = strconv.Atoi(match[5]) 40 if err != nil { 41 err = errors.Wrapf(err, "invalid file path found in multipath bucket") 42 return 43 } 44 return 45 } 46 47 func multipartKeepArgs(path string) (repo string, branch string, key string, uploadID string, err error) { 48 match := multipartKeepPathMatcher.FindStringSubmatch(path) 49 50 if len(match) == 0 { 51 err = errors.New("invalid file path found in multipath bucket") 52 return 53 } 54 55 repo = match[1] 56 branch = match[2] 57 key = match[3] 58 uploadID = match[4] 59 return 60 } 61 62 func parentDirPath(repo, branch, key, uploadID string) string { 63 return path.Join(repo, branch, key, uploadID) 64 } 65 66 func chunkPath(repo, branch, key, uploadID string, partNumber int) string { 67 return path.Join(parentDirPath(repo, branch, key, uploadID), strconv.Itoa(partNumber)) 68 } 69 70 func keepPath(repo, branch, key, uploadID string) string { 71 return path.Join(parentDirPath(repo, branch, key, uploadID), ".keep") 72 } 73 74 func (c *controller) ensureRepo(pc *client.APIClient) error { 75 _, err := pc.InspectBranch(c.repo, "master") 76 if err != nil { 77 err = pc.UpdateRepo(c.repo) 78 if err != nil { 79 return err 80 } 81 82 err = pc.CreateBranch(c.repo, "master", "", nil) 83 if err != nil { 84 return err 85 } 86 } 87 88 return nil 89 } 90 91 func (c *controller) ListMultipart(r *http.Request, bucketName, keyMarker, uploadIDMarker string, maxUploads int) (*s2.ListMultipartResult, error) { 92 c.logger.Debugf("ListMultipart: bucketName=%+v, keyMarker=%+v, uploadIDMarker=%+v, maxUploads=%+v", bucketName, keyMarker, uploadIDMarker, maxUploads) 93 94 pc, err := c.requestClient(r) 95 if err != nil { 96 return nil, err 97 } 98 99 if err = c.ensureRepo(pc); err != nil { 100 return nil, err 101 } 102 103 bucket, err := c.driver.bucket(pc, r, bucketName) 104 if err != nil { 105 return nil, err 106 } 107 108 result := s2.ListMultipartResult{ 109 Uploads: []*s2.Upload{}, 110 } 111 112 globPattern := path.Join(bucket.Repo, bucket.Commit, "*", "*", ".keep") 113 err = pc.GlobFileF(c.repo, "master", globPattern, func(fileInfo *pfsClient.FileInfo) error { 114 _, _, key, uploadID, err := multipartKeepArgs(fileInfo.File.Path) 115 if err != nil { 116 return nil 117 } 118 119 if key <= keyMarker || uploadID <= uploadIDMarker { 120 return nil 121 } 122 123 if len(result.Uploads) >= maxUploads { 124 if maxUploads > 0 { 125 result.IsTruncated = true 126 } 127 return errutil.ErrBreak 128 } 129 130 timestamp, err := types.TimestampFromProto(fileInfo.Committed) 131 if err != nil { 132 return err 133 } 134 135 result.Uploads = append(result.Uploads, &s2.Upload{ 136 Key: key, 137 UploadID: uploadID, 138 Initiator: defaultUser, 139 StorageClass: globalStorageClass, 140 Initiated: timestamp, 141 }) 142 143 return nil 144 }) 145 146 return &result, err 147 } 148 149 func (c *controller) InitMultipart(r *http.Request, bucketName, key string) (string, error) { 150 c.logger.Debugf("InitMultipart: bucketName=%+v, key=%+v", bucketName, key) 151 152 pc, err := c.requestClient(r) 153 if err != nil { 154 return "", err 155 } 156 157 if err = c.ensureRepo(pc); err != nil { 158 return "", err 159 } 160 161 bucket, err := c.driver.bucket(pc, r, bucketName) 162 if err != nil { 163 return "", err 164 } 165 bucketCaps, err := c.driver.bucketCapabilities(pc, r, bucket) 166 if err != nil { 167 return "", err 168 } 169 if !bucketCaps.writable { 170 return "", s2.NotImplementedError(r) 171 } 172 173 uploadID := uuid.NewWithoutDashes() 174 175 _, err = pc.PutFileOverwrite(c.repo, "master", keepPath(bucket.Repo, bucket.Commit, key, uploadID), strings.NewReader(""), 0) 176 if err != nil { 177 return "", err 178 } 179 180 return uploadID, nil 181 } 182 183 func (c *controller) AbortMultipart(r *http.Request, bucketName, key, uploadID string) (retErr error) { 184 c.logger.Infof("AbortMultipart: bucketName=%+v, key=%+v, uploadID=%+v", bucketName, key, uploadID) 185 defer func(start time.Time) { 186 c.logger.Infof("AbortMultipart: duration=%v, error=%v", time.Since(start), retErr) 187 }(time.Now()) 188 pc, err := c.requestClient(r) 189 if err != nil { 190 return err 191 } 192 193 if err = c.ensureRepo(pc); err != nil { 194 return err 195 } 196 197 bucket, err := c.driver.bucket(pc, r, bucketName) 198 if err != nil { 199 return err 200 } 201 202 _, err = pc.InspectFile(c.repo, "master", keepPath(bucket.Repo, bucket.Commit, key, uploadID)) 203 if err != nil { 204 return s2.NoSuchUploadError(r) 205 } 206 207 err = pc.DeleteFile(c.repo, "master", parentDirPath(bucket.Repo, bucket.Commit, key, uploadID)) 208 if err != nil { 209 return s2.InternalError(r, err) 210 } 211 212 return nil 213 } 214 215 func (c *controller) CompleteMultipart(r *http.Request, bucketName, key, uploadID string, parts []*s2.Part) (res *s2.CompleteMultipartResult, retErr error) { 216 c.logger.Debugf("CompleteMultipart: bucketName=%+v, key=%+v, uploadID=%+v, parts=%+v", bucketName, key, uploadID, parts) 217 defer func(start time.Time) { 218 c.logger.Infof("CompleteMultipart: duration=%v, result=%+v, error=%v", time.Since(start), res, retErr) 219 }(time.Now()) 220 pc, err := c.requestClient(r) 221 if err != nil { 222 return nil, err 223 } 224 225 if err = c.ensureRepo(pc); err != nil { 226 return nil, err 227 } 228 229 bucket, err := c.driver.bucket(pc, r, bucketName) 230 if err != nil { 231 return nil, err 232 } 233 bucketCaps, err := c.driver.bucketCapabilities(pc, r, bucket) 234 if err != nil { 235 return nil, err 236 } 237 if !bucketCaps.writable { 238 return nil, s2.NotImplementedError(r) 239 } 240 241 _, err = pc.InspectFile(c.repo, "master", keepPath(bucket.Repo, bucket.Commit, key, uploadID)) 242 if err != nil { 243 if pfsServer.IsFileNotFoundErr(err) { 244 return nil, s2.NoSuchUploadError(r) 245 } 246 return nil, err 247 } 248 249 // S3 "supports" concurrent complete calls on the same upload ID. 250 // Write to a random file ID in our directory to avoid conflict 251 dstPath := uuid.NewWithoutDashes() 252 253 for i, part := range parts { 254 srcPath := chunkPath(bucket.Repo, bucket.Commit, key, uploadID, part.PartNumber) 255 256 fileInfo, err := pc.InspectFile(c.repo, "master", srcPath) 257 if err != nil { 258 if pfsServer.IsFileNotFoundErr(err) { 259 return nil, s2.InvalidPartError(r) 260 } 261 return nil, err 262 } 263 264 // Only verify the ETag when it's of the same length as PFS file 265 // hashes. This is because s3 clients will generally use md5 for 266 // ETags, and would otherwise fail. 267 expectedETag := fmt.Sprintf("%x", fileInfo.Hash) 268 if len(part.ETag) == len(expectedETag) && part.ETag != expectedETag { 269 return nil, s2.InvalidPartError(r) 270 } 271 272 if i < len(parts)-1 && fileInfo.SizeBytes < 5*1024*1024 { 273 // each part, except for the last, is expected to be at least 5mb 274 // in s3 275 return nil, s2.EntityTooSmallError(r) 276 } 277 278 if err := pc.CopyFile(c.repo, "master", srcPath, c.repo, "master", dstPath, false); err != nil { 279 return nil, err 280 } 281 } 282 283 // overwrite file, for "last write wins" behavior 284 if err := pc.CopyFile(c.repo, "master", dstPath, bucket.Repo, bucket.Commit, key, true); err != nil { 285 if errutil.IsWriteToOutputBranchError(err) { 286 return nil, writeToOutputBranchError(r) 287 } 288 return nil, err 289 } 290 291 if err := pc.DeleteFile(c.repo, "master", parentDirPath(bucket.Repo, bucket.Commit, key, uploadID)); err != nil { 292 return nil, err 293 } 294 295 fileInfo, err := pc.InspectFile(bucket.Repo, bucket.Commit, key) 296 if err != nil && !pfsServer.IsOutputCommitNotFinishedErr(err) { 297 return nil, err 298 } 299 300 result := s2.CompleteMultipartResult{Location: globalLocation} 301 if fileInfo != nil { 302 result.ETag = fmt.Sprintf("%x", fileInfo.Hash) 303 result.Version = fileInfo.File.Commit.ID 304 } 305 306 return &result, nil 307 } 308 309 func (c *controller) ListMultipartChunks(r *http.Request, bucketName, key, uploadID string, partNumberMarker, maxParts int) (*s2.ListMultipartChunksResult, error) { 310 c.logger.Debugf("ListMultipartChunks: bucketName=%+v, key=%+v, uploadID=%+v, partNumberMarker=%+v, maxParts=%+v", bucketName, key, uploadID, partNumberMarker, maxParts) 311 312 pc, err := c.requestClient(r) 313 if err != nil { 314 return nil, err 315 } 316 317 if err = c.ensureRepo(pc); err != nil { 318 return nil, err 319 } 320 321 bucket, err := c.driver.bucket(pc, r, bucketName) 322 if err != nil { 323 return nil, err 324 } 325 326 result := s2.ListMultipartChunksResult{ 327 Initiator: &defaultUser, 328 Owner: &defaultUser, 329 StorageClass: globalStorageClass, 330 Parts: []*s2.Part{}, 331 } 332 333 globPattern := path.Join(parentDirPath(bucket.Repo, bucket.Commit, key, uploadID), "*") 334 err = pc.GlobFileF(c.repo, "master", globPattern, func(fileInfo *pfsClient.FileInfo) error { 335 _, _, _, _, partNumber, err := multipartChunkArgs(fileInfo.File.Path) 336 if err != nil { 337 return nil 338 } 339 340 if partNumber <= partNumberMarker { 341 return nil 342 } 343 344 if len(result.Parts) >= maxParts { 345 if maxParts > 0 { 346 result.IsTruncated = true 347 } 348 return errutil.ErrBreak 349 } 350 351 result.Parts = append(result.Parts, &s2.Part{ 352 PartNumber: partNumber, 353 ETag: fmt.Sprintf("%x", fileInfo.Hash), 354 }) 355 356 return nil 357 }) 358 359 return &result, err 360 } 361 362 func (c *controller) UploadMultipartChunk(r *http.Request, bucketName, key, uploadID string, partNumber int, reader io.Reader) (string, error) { 363 c.logger.Debugf("UploadMultipartChunk: bucketName=%+v, key=%+v, uploadID=%+v partNumber=%+v", bucketName, key, uploadID, partNumber) 364 365 pc, err := c.requestClient(r) 366 if err != nil { 367 return "", err 368 } 369 370 if err = c.ensureRepo(pc); err != nil { 371 return "", err 372 } 373 374 bucket, err := c.driver.bucket(pc, r, bucketName) 375 if err != nil { 376 return "", err 377 } 378 379 _, err = pc.InspectFile(c.repo, "master", keepPath(bucket.Repo, bucket.Commit, key, uploadID)) 380 if err != nil { 381 if pfsServer.IsFileNotFoundErr(err) { 382 return "", s2.NoSuchUploadError(r) 383 } 384 return "", err 385 } 386 387 path := chunkPath(bucket.Repo, bucket.Commit, key, uploadID, partNumber) 388 _, err = pc.PutFileOverwrite(c.repo, "master", path, reader, 0) 389 if err != nil { 390 return "", err 391 } 392 393 fileInfo, err := pc.InspectFile(c.repo, "master", path) 394 if err != nil { 395 return "", err 396 } 397 398 return fmt.Sprintf("%x", fileInfo.Hash), nil 399 }