github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/block/local/adapter.go (about) 1 package local 2 3 import ( 4 "context" 5 "crypto/md5" //nolint:gosec 6 "encoding/hex" 7 "errors" 8 "fmt" 9 "io" 10 "net/http" 11 "net/url" 12 "os" 13 "path" 14 "path/filepath" 15 "sort" 16 "strconv" 17 "strings" 18 "time" 19 20 "github.com/google/uuid" 21 "github.com/treeverse/lakefs/pkg/block" 22 "github.com/treeverse/lakefs/pkg/block/params" 23 "golang.org/x/exp/slices" 24 ) 25 26 const DefaultNamespacePrefix = block.BlockstoreTypeLocal + "://" 27 28 type Adapter struct { 29 path string 30 removeEmptyDir bool 31 allowedExternalPrefixes []string 32 importEnabled bool 33 } 34 35 var ( 36 ErrPathNotWritable = errors.New("path provided is not writable") 37 ErrInvalidUploadIDFormat = errors.New("invalid upload id format") 38 ErrBadPath = errors.New("bad path traversal blocked") 39 ) 40 41 type QualifiedKey struct { 42 block.CommonQualifiedKey 43 path string 44 } 45 46 func (qk QualifiedKey) Format() string { 47 p := path.Join(qk.path, qk.GetStorageNamespace(), qk.GetKey()) 48 return qk.GetStorageType().Scheme() + "://" + p 49 } 50 51 func (qk QualifiedKey) GetStorageType() block.StorageType { 52 return qk.CommonQualifiedKey.GetStorageType() 53 } 54 55 func (qk QualifiedKey) GetStorageNamespace() string { 56 return qk.CommonQualifiedKey.GetStorageNamespace() 57 } 58 59 func (qk QualifiedKey) GetKey() string { 60 return qk.CommonQualifiedKey.GetKey() 61 } 62 63 func WithAllowedExternalPrefixes(prefixes []string) func(a *Adapter) { 64 return func(a *Adapter) { 65 a.allowedExternalPrefixes = prefixes 66 } 67 } 68 69 func WithImportEnabled(b bool) func(a *Adapter) { 70 return func(a *Adapter) { 71 a.importEnabled = b 72 } 73 } 74 75 func WithRemoveEmptyDir(b bool) func(a *Adapter) { 76 return func(a *Adapter) { 77 a.removeEmptyDir = b 78 } 79 } 80 81 func NewAdapter(path string, opts ...func(a *Adapter)) (*Adapter, error) { 82 // Clean() the path so that misconfiguration does not allow path traversal. 83 path = filepath.Clean(path) 84 err := os.MkdirAll(path, 0o700) //nolint: mnd 85 if err != nil { 86 return nil, err 87 } 88 if !isDirectoryWritable(path) { 89 return nil, ErrPathNotWritable 90 } 91 localAdapter := &Adapter{ 92 path: path, 93 removeEmptyDir: true, 94 } 95 for _, opt := range opts { 96 opt(localAdapter) 97 } 98 return localAdapter, nil 99 } 100 101 func (l *Adapter) GetPreSignedURL(_ context.Context, _ block.ObjectPointer, _ block.PreSignMode) (string, time.Time, error) { 102 return "", time.Time{}, fmt.Errorf("local adapter presigned URL: %w", block.ErrOperationNotSupported) 103 } 104 105 // verifyRelPath ensures that p is under the directory controlled by this adapter. It does not 106 // examine the filesystem and can mistakenly error out when symbolic links are involved. 107 func (l *Adapter) verifyRelPath(p string) error { 108 if !strings.HasPrefix(filepath.Clean(p), l.path) { 109 return fmt.Errorf("%s: %w", p, ErrBadPath) 110 } 111 return nil 112 } 113 114 func (l *Adapter) extractParamsFromObj(ptr block.ObjectPointer) (string, error) { 115 if strings.HasPrefix(ptr.Identifier, DefaultNamespacePrefix) { 116 // check abs path 117 p := ptr.Identifier[len(DefaultNamespacePrefix):] 118 if err := VerifyAbsPath(p, l.path, l.allowedExternalPrefixes); err != nil { 119 return "", err 120 } 121 return p, nil 122 } 123 // relative path 124 if !strings.HasPrefix(ptr.StorageNamespace, DefaultNamespacePrefix) { 125 return "", fmt.Errorf("%w: storage namespace", ErrBadPath) 126 } 127 p := path.Join(l.path, ptr.StorageNamespace[len(DefaultNamespacePrefix):], ptr.Identifier) 128 if err := l.verifyRelPath(p); err != nil { 129 return "", err 130 } 131 return p, nil 132 } 133 134 // maybeMkdir verifies path is allowed and runs f(path), but if f fails due to file-not-found 135 // MkdirAll's its dir and then runs it again. 136 func (l *Adapter) maybeMkdir(path string, f func(p string) (*os.File, error)) (*os.File, error) { 137 if err := l.verifyRelPath(path); err != nil { 138 return nil, err 139 } 140 ret, err := f(path) 141 if !errors.Is(err, os.ErrNotExist) { 142 return ret, err 143 } 144 d := filepath.Dir(filepath.Clean(path)) 145 if err = os.MkdirAll(d, 0o750); err != nil { //nolint: mnd 146 return nil, err 147 } 148 return f(path) 149 } 150 151 func (l *Adapter) Path() string { 152 return l.path 153 } 154 155 func (l *Adapter) Put(_ context.Context, obj block.ObjectPointer, _ int64, reader io.Reader, _ block.PutOpts) error { 156 p, err := l.extractParamsFromObj(obj) 157 if err != nil { 158 return err 159 } 160 p = filepath.Clean(p) 161 f, err := l.maybeMkdir(p, os.Create) 162 if err != nil { 163 return err 164 } 165 defer func() { 166 _ = f.Close() 167 }() 168 _, err = io.Copy(f, reader) 169 return err 170 } 171 172 func (l *Adapter) Remove(_ context.Context, obj block.ObjectPointer) error { 173 p, err := l.extractParamsFromObj(obj) 174 if err != nil { 175 return err 176 } 177 p = filepath.Clean(p) 178 err = os.Remove(p) 179 if err != nil { 180 return err 181 } 182 if l.removeEmptyDir { 183 dir := filepath.Dir(p) 184 repoRoot := obj.StorageNamespace[len(DefaultNamespacePrefix):] 185 removeEmptyDirUntil(dir, path.Join(l.path, repoRoot)) 186 } 187 return nil 188 } 189 190 func removeEmptyDirUntil(dir string, stopAt string) { 191 if stopAt == "" { 192 return 193 } 194 if !strings.HasSuffix(stopAt, "/") { 195 stopAt += "/" 196 } 197 for strings.HasPrefix(dir, stopAt) && dir != stopAt { 198 err := os.Remove(dir) 199 if err != nil { 200 break 201 } 202 dir = filepath.Dir(dir) 203 if dir == "/" { 204 break 205 } 206 } 207 } 208 209 func (l *Adapter) Copy(_ context.Context, sourceObj, destinationObj block.ObjectPointer) error { 210 source, err := l.extractParamsFromObj(sourceObj) 211 if err != nil { 212 return err 213 } 214 sourceFile, err := os.Open(filepath.Clean(source)) 215 defer func() { 216 _ = sourceFile.Close() 217 }() 218 if err != nil { 219 return err 220 } 221 dest, err := l.extractParamsFromObj(destinationObj) 222 if err != nil { 223 return err 224 } 225 destinationFile, err := l.maybeMkdir(dest, os.Create) 226 if err != nil { 227 return err 228 } 229 defer func() { 230 _ = destinationFile.Close() 231 }() 232 _, err = io.Copy(destinationFile, sourceFile) 233 return err 234 } 235 236 func (l *Adapter) UploadCopyPart(ctx context.Context, sourceObj, destinationObj block.ObjectPointer, uploadID string, partNumber int) (*block.UploadPartResponse, error) { 237 if err := isValidUploadID(uploadID); err != nil { 238 return nil, err 239 } 240 r, err := l.Get(ctx, sourceObj, 0) 241 if err != nil { 242 return nil, fmt.Errorf("copy get: %w", err) 243 } 244 md5Read := block.NewHashingReader(r, block.HashFunctionMD5) 245 fName := uploadID + fmt.Sprintf("-%05d", partNumber) 246 err = l.Put(ctx, block.ObjectPointer{StorageNamespace: destinationObj.StorageNamespace, Identifier: fName}, -1, md5Read, block.PutOpts{}) 247 if err != nil { 248 return nil, fmt.Errorf("copy put: %w", err) 249 } 250 etag := hex.EncodeToString(md5Read.Md5.Sum(nil)) 251 return &block.UploadPartResponse{ 252 ETag: etag, 253 }, nil 254 } 255 256 func (l *Adapter) UploadCopyPartRange(ctx context.Context, sourceObj, destinationObj block.ObjectPointer, uploadID string, partNumber int, startPosition, endPosition int64) (*block.UploadPartResponse, error) { 257 if err := isValidUploadID(uploadID); err != nil { 258 return nil, err 259 } 260 r, err := l.GetRange(ctx, sourceObj, startPosition, endPosition) 261 if err != nil { 262 return nil, fmt.Errorf("copy range get: %w", err) 263 } 264 md5Read := block.NewHashingReader(r, block.HashFunctionMD5) 265 fName := uploadID + fmt.Sprintf("-%05d", partNumber) 266 err = l.Put(ctx, block.ObjectPointer{StorageNamespace: destinationObj.StorageNamespace, Identifier: fName}, -1, md5Read, block.PutOpts{}) 267 if err != nil { 268 return nil, fmt.Errorf("copy range put: %w", err) 269 } 270 etag := hex.EncodeToString(md5Read.Md5.Sum(nil)) 271 return &block.UploadPartResponse{ 272 ETag: etag, 273 }, err 274 } 275 276 func (l *Adapter) Get(_ context.Context, obj block.ObjectPointer, _ int64) (reader io.ReadCloser, err error) { 277 p, err := l.extractParamsFromObj(obj) 278 if err != nil { 279 return nil, err 280 } 281 f, err := os.OpenFile(filepath.Clean(p), os.O_RDONLY, 0o600) //nolint: mnd 282 if os.IsNotExist(err) { 283 return nil, block.ErrDataNotFound 284 } 285 if err != nil { 286 return nil, err 287 } 288 return f, nil 289 } 290 291 func (l *Adapter) GetWalker(uri *url.URL) (block.Walker, error) { 292 if err := block.ValidateStorageType(uri, block.StorageTypeLocal); err != nil { 293 return nil, err 294 } 295 296 err := VerifyAbsPath(uri.Path, l.path, l.allowedExternalPrefixes) 297 if err != nil { 298 return nil, err 299 } 300 return NewLocalWalker(params.Local{ 301 Path: l.path, 302 ImportEnabled: l.importEnabled, 303 AllowedExternalPrefixes: l.allowedExternalPrefixes, 304 }), nil 305 } 306 307 func (l *Adapter) Exists(_ context.Context, obj block.ObjectPointer) (bool, error) { 308 p, err := l.extractParamsFromObj(obj) 309 if err != nil { 310 return false, err 311 } 312 _, err = os.Stat(p) 313 if err != nil { 314 if os.IsNotExist(err) { 315 return false, nil 316 } 317 return false, err 318 } 319 return true, nil 320 } 321 322 func (l *Adapter) GetRange(_ context.Context, obj block.ObjectPointer, start int64, end int64) (io.ReadCloser, error) { 323 if start < 0 || end < start { 324 return nil, block.ErrBadIndex 325 } 326 p, err := l.extractParamsFromObj(obj) 327 if err != nil { 328 return nil, err 329 } 330 f, err := os.Open(filepath.Clean(p)) 331 if err != nil { 332 if os.IsNotExist(err) { 333 return nil, block.ErrDataNotFound 334 } 335 return nil, err 336 } 337 return &struct { 338 io.Reader 339 io.Closer 340 }{ 341 Reader: io.NewSectionReader(f, start, end-start+1), 342 Closer: f, 343 }, nil 344 } 345 346 func (l *Adapter) GetProperties(_ context.Context, obj block.ObjectPointer) (block.Properties, error) { 347 p, err := l.extractParamsFromObj(obj) 348 if err != nil { 349 return block.Properties{}, err 350 } 351 _, err = os.Stat(p) 352 if err != nil { 353 return block.Properties{}, err 354 } 355 // No properties, just return that it exists 356 return block.Properties{}, nil 357 } 358 359 // isDirectoryWritable tests that pth, which must not be controllable by user input, is a 360 // writable directory. As there is no simple way to test this in windows, I prefer the "brute 361 // force" method of creating s dummy file. Will work in any OS. speed is not an issue, as 362 // this will be activated very few times during startup. 363 func isDirectoryWritable(pth string) bool { 364 f, err := os.CreateTemp(pth, "dummy") 365 if err != nil { 366 return false 367 } 368 _ = f.Close() 369 _ = os.Remove(f.Name()) 370 return true 371 } 372 373 func (l *Adapter) CreateMultiPartUpload(_ context.Context, obj block.ObjectPointer, _ *http.Request, _ block.CreateMultiPartUploadOpts) (*block.CreateMultiPartUploadResponse, error) { 374 if strings.Contains(obj.Identifier, "/") { 375 fullPath, err := l.extractParamsFromObj(obj) 376 if err != nil { 377 return nil, err 378 } 379 fullDir := path.Dir(fullPath) 380 err = os.MkdirAll(fullDir, 0o750) //nolint: mnd 381 if err != nil { 382 return nil, err 383 } 384 } 385 uidBytes := uuid.New() 386 uploadID := hex.EncodeToString(uidBytes[:]) 387 return &block.CreateMultiPartUploadResponse{ 388 UploadID: uploadID, 389 }, nil 390 } 391 392 func (l *Adapter) UploadPart(ctx context.Context, obj block.ObjectPointer, _ int64, reader io.Reader, uploadID string, partNumber int) (*block.UploadPartResponse, error) { 393 if err := isValidUploadID(uploadID); err != nil { 394 return nil, err 395 } 396 md5Read := block.NewHashingReader(reader, block.HashFunctionMD5) 397 fName := uploadID + fmt.Sprintf("-%05d", partNumber) 398 err := l.Put(ctx, block.ObjectPointer{StorageNamespace: obj.StorageNamespace, Identifier: fName}, -1, md5Read, block.PutOpts{}) 399 etag := hex.EncodeToString(md5Read.Md5.Sum(nil)) 400 return &block.UploadPartResponse{ 401 ETag: etag, 402 }, err 403 } 404 405 func (l *Adapter) AbortMultiPartUpload(_ context.Context, obj block.ObjectPointer, uploadID string) error { 406 if err := isValidUploadID(uploadID); err != nil { 407 return err 408 } 409 files, err := l.getPartFiles(uploadID, obj) 410 if err != nil { 411 return err 412 } 413 if err = l.removePartFiles(files); err != nil { 414 return err 415 } 416 return nil 417 } 418 419 func (l *Adapter) CompleteMultiPartUpload(_ context.Context, obj block.ObjectPointer, uploadID string, multipartList *block.MultipartUploadCompletion) (*block.CompleteMultiPartUploadResponse, error) { 420 if err := isValidUploadID(uploadID); err != nil { 421 return nil, err 422 } 423 etag := computeETag(multipartList.Part) + "-" + strconv.Itoa(len(multipartList.Part)) 424 partFiles, err := l.getPartFiles(uploadID, obj) 425 if err != nil { 426 return nil, fmt.Errorf("part files not found for %s: %w", uploadID, err) 427 } 428 size, err := l.unitePartFiles(obj, partFiles) 429 if err != nil { 430 return nil, fmt.Errorf("multipart upload unite for %s: %w", uploadID, err) 431 } 432 if err = l.removePartFiles(partFiles); err != nil { 433 return nil, err 434 } 435 return &block.CompleteMultiPartUploadResponse{ 436 ETag: etag, 437 ContentLength: size, 438 }, nil 439 } 440 441 func computeETag(parts []block.MultipartPart) string { 442 var etagHex []string 443 for _, p := range parts { 444 e := strings.Trim(p.ETag, `"`) 445 etagHex = append(etagHex, e) 446 } 447 s := strings.Join(etagHex, "") 448 b, _ := hex.DecodeString(s) 449 md5res := md5.Sum(b) //nolint:gosec 450 csm := hex.EncodeToString(md5res[:]) 451 return csm 452 } 453 454 func (l *Adapter) unitePartFiles(identifier block.ObjectPointer, filenames []string) (int64, error) { 455 p, err := l.extractParamsFromObj(identifier) 456 if err != nil { 457 return 0, err 458 } 459 unitedFile, err := os.Create(p) 460 if err != nil { 461 return 0, fmt.Errorf("create path %s: %w", p, err) 462 } 463 files := make([]*os.File, 0, len(filenames)) 464 defer func() { 465 _ = unitedFile.Close() 466 for _, f := range files { 467 _ = f.Close() 468 } 469 }() 470 for _, name := range filenames { 471 if err := l.verifyRelPath(name); err != nil { 472 return 0, err 473 } 474 f, err := os.Open(filepath.Clean(name)) 475 if err != nil { 476 return 0, fmt.Errorf("open file %s: %w", name, err) 477 } 478 files = append(files, f) 479 } 480 // convert slice file files to readers 481 readers := make([]io.Reader, len(files)) 482 for i := range files { 483 readers[i] = files[i] 484 } 485 unitedReader := io.MultiReader(readers...) 486 return io.Copy(unitedFile, unitedReader) 487 } 488 489 func (l *Adapter) removePartFiles(files []string) error { 490 var firstErr error 491 for _, name := range files { 492 if err := l.verifyRelPath(name); err != nil { 493 if firstErr == nil { 494 firstErr = err 495 } 496 } 497 // If removal fails prefer to skip the error: "only" wasted space. 498 _ = os.Remove(name) 499 } 500 return firstErr 501 } 502 503 func (l *Adapter) getPartFiles(uploadID string, obj block.ObjectPointer) ([]string, error) { 504 newObj := block.ObjectPointer{ 505 StorageNamespace: obj.StorageNamespace, 506 Identifier: uploadID, 507 } 508 globPathPattern, err := l.extractParamsFromObj(newObj) 509 if err != nil { 510 return nil, err 511 } 512 globPathPattern += "*" 513 names, err := filepath.Glob(globPathPattern) 514 if err != nil { 515 return nil, err 516 } 517 sort.Strings(names) 518 return names, nil 519 } 520 521 func (l *Adapter) BlockstoreType() string { 522 return block.BlockstoreTypeLocal 523 } 524 525 func (l *Adapter) GetStorageNamespaceInfo() block.StorageNamespaceInfo { 526 info := block.DefaultStorageNamespaceInfo(block.BlockstoreTypeLocal) 527 info.PreSignSupport = false 528 info.DefaultNamespacePrefix = DefaultNamespacePrefix 529 info.ImportSupport = l.importEnabled 530 return info 531 } 532 533 func (l *Adapter) ResolveNamespace(storageNamespace, key string, identifierType block.IdentifierType) (block.QualifiedKey, error) { 534 qk, err := block.DefaultResolveNamespace(storageNamespace, key, identifierType) 535 if err != nil { 536 return nil, err 537 } 538 539 // Check if path allowed and return error if path is not allowed 540 _, err = l.extractParamsFromObj(block.ObjectPointer{ 541 StorageNamespace: storageNamespace, 542 Identifier: key, 543 IdentifierType: identifierType, 544 }) 545 if err != nil { 546 return nil, err 547 } 548 549 return QualifiedKey{ 550 CommonQualifiedKey: qk, 551 path: l.path, 552 }, nil 553 } 554 555 func (l *Adapter) RuntimeStats() map[string]string { 556 return nil 557 } 558 559 func VerifyAbsPath(absPath, adapterPath string, allowedPrefixes []string) error { 560 // check we have a valid abs path 561 if !filepath.IsAbs(absPath) || filepath.Clean(absPath) != absPath { 562 return ErrBadPath 563 } 564 // point to storage namespace 565 if strings.HasPrefix(absPath, adapterPath) { 566 return nil 567 } 568 // allowed places 569 if !slices.ContainsFunc(allowedPrefixes, func(prefix string) bool { 570 return strings.HasPrefix(absPath, prefix) 571 }) { 572 return block.ErrForbidden 573 } 574 return nil 575 } 576 577 func isValidUploadID(uploadID string) error { 578 _, err := hex.DecodeString(uploadID) 579 if err != nil { 580 return fmt.Errorf("%w: %s", ErrInvalidUploadIDFormat, err) 581 } 582 return nil 583 } 584 585 func (l *Adapter) GetPresignUploadPartURL(_ context.Context, _ block.ObjectPointer, _ string, _ int) (string, error) { 586 return "", block.ErrOperationNotSupported 587 } 588 589 func (l *Adapter) ListParts(_ context.Context, _ block.ObjectPointer, _ string, _ block.ListPartsOpts) (*block.ListPartsResponse, error) { 590 return nil, block.ErrOperationNotSupported 591 }