github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/block/local/adapter.go (about)

     1  package local
     2  
     3  import (
     4  	"context"
     5  	"crypto/md5" //nolint:gosec
     6  	"encoding/hex"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"net/http"
    11  	"net/url"
    12  	"os"
    13  	"path"
    14  	"path/filepath"
    15  	"sort"
    16  	"strconv"
    17  	"strings"
    18  	"time"
    19  
    20  	"github.com/google/uuid"
    21  	"github.com/treeverse/lakefs/pkg/block"
    22  	"github.com/treeverse/lakefs/pkg/block/params"
    23  	"golang.org/x/exp/slices"
    24  )
    25  
    26  const DefaultNamespacePrefix = block.BlockstoreTypeLocal + "://"
    27  
    28  type Adapter struct {
    29  	path                    string
    30  	removeEmptyDir          bool
    31  	allowedExternalPrefixes []string
    32  	importEnabled           bool
    33  }
    34  
    35  var (
    36  	ErrPathNotWritable       = errors.New("path provided is not writable")
    37  	ErrInvalidUploadIDFormat = errors.New("invalid upload id format")
    38  	ErrBadPath               = errors.New("bad path traversal blocked")
    39  )
    40  
    41  type QualifiedKey struct {
    42  	block.CommonQualifiedKey
    43  	path string
    44  }
    45  
    46  func (qk QualifiedKey) Format() string {
    47  	p := path.Join(qk.path, qk.GetStorageNamespace(), qk.GetKey())
    48  	return qk.GetStorageType().Scheme() + "://" + p
    49  }
    50  
    51  func (qk QualifiedKey) GetStorageType() block.StorageType {
    52  	return qk.CommonQualifiedKey.GetStorageType()
    53  }
    54  
    55  func (qk QualifiedKey) GetStorageNamespace() string {
    56  	return qk.CommonQualifiedKey.GetStorageNamespace()
    57  }
    58  
    59  func (qk QualifiedKey) GetKey() string {
    60  	return qk.CommonQualifiedKey.GetKey()
    61  }
    62  
    63  func WithAllowedExternalPrefixes(prefixes []string) func(a *Adapter) {
    64  	return func(a *Adapter) {
    65  		a.allowedExternalPrefixes = prefixes
    66  	}
    67  }
    68  
    69  func WithImportEnabled(b bool) func(a *Adapter) {
    70  	return func(a *Adapter) {
    71  		a.importEnabled = b
    72  	}
    73  }
    74  
    75  func WithRemoveEmptyDir(b bool) func(a *Adapter) {
    76  	return func(a *Adapter) {
    77  		a.removeEmptyDir = b
    78  	}
    79  }
    80  
    81  func NewAdapter(path string, opts ...func(a *Adapter)) (*Adapter, error) {
    82  	// Clean() the path so that misconfiguration does not allow path traversal.
    83  	path = filepath.Clean(path)
    84  	err := os.MkdirAll(path, 0o700) //nolint: mnd
    85  	if err != nil {
    86  		return nil, err
    87  	}
    88  	if !isDirectoryWritable(path) {
    89  		return nil, ErrPathNotWritable
    90  	}
    91  	localAdapter := &Adapter{
    92  		path:           path,
    93  		removeEmptyDir: true,
    94  	}
    95  	for _, opt := range opts {
    96  		opt(localAdapter)
    97  	}
    98  	return localAdapter, nil
    99  }
   100  
   101  func (l *Adapter) GetPreSignedURL(_ context.Context, _ block.ObjectPointer, _ block.PreSignMode) (string, time.Time, error) {
   102  	return "", time.Time{}, fmt.Errorf("local adapter presigned URL: %w", block.ErrOperationNotSupported)
   103  }
   104  
   105  // verifyRelPath ensures that p is under the directory controlled by this adapter.  It does not
   106  // examine the filesystem and can mistakenly error out when symbolic links are involved.
   107  func (l *Adapter) verifyRelPath(p string) error {
   108  	if !strings.HasPrefix(filepath.Clean(p), l.path) {
   109  		return fmt.Errorf("%s: %w", p, ErrBadPath)
   110  	}
   111  	return nil
   112  }
   113  
   114  func (l *Adapter) extractParamsFromObj(ptr block.ObjectPointer) (string, error) {
   115  	if strings.HasPrefix(ptr.Identifier, DefaultNamespacePrefix) {
   116  		// check abs path
   117  		p := ptr.Identifier[len(DefaultNamespacePrefix):]
   118  		if err := VerifyAbsPath(p, l.path, l.allowedExternalPrefixes); err != nil {
   119  			return "", err
   120  		}
   121  		return p, nil
   122  	}
   123  	// relative path
   124  	if !strings.HasPrefix(ptr.StorageNamespace, DefaultNamespacePrefix) {
   125  		return "", fmt.Errorf("%w: storage namespace", ErrBadPath)
   126  	}
   127  	p := path.Join(l.path, ptr.StorageNamespace[len(DefaultNamespacePrefix):], ptr.Identifier)
   128  	if err := l.verifyRelPath(p); err != nil {
   129  		return "", err
   130  	}
   131  	return p, nil
   132  }
   133  
   134  // maybeMkdir verifies path is allowed and runs f(path), but if f fails due to file-not-found
   135  // MkdirAll's its dir and then runs it again.
   136  func (l *Adapter) maybeMkdir(path string, f func(p string) (*os.File, error)) (*os.File, error) {
   137  	if err := l.verifyRelPath(path); err != nil {
   138  		return nil, err
   139  	}
   140  	ret, err := f(path)
   141  	if !errors.Is(err, os.ErrNotExist) {
   142  		return ret, err
   143  	}
   144  	d := filepath.Dir(filepath.Clean(path))
   145  	if err = os.MkdirAll(d, 0o750); err != nil { //nolint: mnd
   146  		return nil, err
   147  	}
   148  	return f(path)
   149  }
   150  
   151  func (l *Adapter) Path() string {
   152  	return l.path
   153  }
   154  
   155  func (l *Adapter) Put(_ context.Context, obj block.ObjectPointer, _ int64, reader io.Reader, _ block.PutOpts) error {
   156  	p, err := l.extractParamsFromObj(obj)
   157  	if err != nil {
   158  		return err
   159  	}
   160  	p = filepath.Clean(p)
   161  	f, err := l.maybeMkdir(p, os.Create)
   162  	if err != nil {
   163  		return err
   164  	}
   165  	defer func() {
   166  		_ = f.Close()
   167  	}()
   168  	_, err = io.Copy(f, reader)
   169  	return err
   170  }
   171  
   172  func (l *Adapter) Remove(_ context.Context, obj block.ObjectPointer) error {
   173  	p, err := l.extractParamsFromObj(obj)
   174  	if err != nil {
   175  		return err
   176  	}
   177  	p = filepath.Clean(p)
   178  	err = os.Remove(p)
   179  	if err != nil {
   180  		return err
   181  	}
   182  	if l.removeEmptyDir {
   183  		dir := filepath.Dir(p)
   184  		repoRoot := obj.StorageNamespace[len(DefaultNamespacePrefix):]
   185  		removeEmptyDirUntil(dir, path.Join(l.path, repoRoot))
   186  	}
   187  	return nil
   188  }
   189  
   190  func removeEmptyDirUntil(dir string, stopAt string) {
   191  	if stopAt == "" {
   192  		return
   193  	}
   194  	if !strings.HasSuffix(stopAt, "/") {
   195  		stopAt += "/"
   196  	}
   197  	for strings.HasPrefix(dir, stopAt) && dir != stopAt {
   198  		err := os.Remove(dir)
   199  		if err != nil {
   200  			break
   201  		}
   202  		dir = filepath.Dir(dir)
   203  		if dir == "/" {
   204  			break
   205  		}
   206  	}
   207  }
   208  
   209  func (l *Adapter) Copy(_ context.Context, sourceObj, destinationObj block.ObjectPointer) error {
   210  	source, err := l.extractParamsFromObj(sourceObj)
   211  	if err != nil {
   212  		return err
   213  	}
   214  	sourceFile, err := os.Open(filepath.Clean(source))
   215  	defer func() {
   216  		_ = sourceFile.Close()
   217  	}()
   218  	if err != nil {
   219  		return err
   220  	}
   221  	dest, err := l.extractParamsFromObj(destinationObj)
   222  	if err != nil {
   223  		return err
   224  	}
   225  	destinationFile, err := l.maybeMkdir(dest, os.Create)
   226  	if err != nil {
   227  		return err
   228  	}
   229  	defer func() {
   230  		_ = destinationFile.Close()
   231  	}()
   232  	_, err = io.Copy(destinationFile, sourceFile)
   233  	return err
   234  }
   235  
   236  func (l *Adapter) UploadCopyPart(ctx context.Context, sourceObj, destinationObj block.ObjectPointer, uploadID string, partNumber int) (*block.UploadPartResponse, error) {
   237  	if err := isValidUploadID(uploadID); err != nil {
   238  		return nil, err
   239  	}
   240  	r, err := l.Get(ctx, sourceObj, 0)
   241  	if err != nil {
   242  		return nil, fmt.Errorf("copy get: %w", err)
   243  	}
   244  	md5Read := block.NewHashingReader(r, block.HashFunctionMD5)
   245  	fName := uploadID + fmt.Sprintf("-%05d", partNumber)
   246  	err = l.Put(ctx, block.ObjectPointer{StorageNamespace: destinationObj.StorageNamespace, Identifier: fName}, -1, md5Read, block.PutOpts{})
   247  	if err != nil {
   248  		return nil, fmt.Errorf("copy put: %w", err)
   249  	}
   250  	etag := hex.EncodeToString(md5Read.Md5.Sum(nil))
   251  	return &block.UploadPartResponse{
   252  		ETag: etag,
   253  	}, nil
   254  }
   255  
   256  func (l *Adapter) UploadCopyPartRange(ctx context.Context, sourceObj, destinationObj block.ObjectPointer, uploadID string, partNumber int, startPosition, endPosition int64) (*block.UploadPartResponse, error) {
   257  	if err := isValidUploadID(uploadID); err != nil {
   258  		return nil, err
   259  	}
   260  	r, err := l.GetRange(ctx, sourceObj, startPosition, endPosition)
   261  	if err != nil {
   262  		return nil, fmt.Errorf("copy range get: %w", err)
   263  	}
   264  	md5Read := block.NewHashingReader(r, block.HashFunctionMD5)
   265  	fName := uploadID + fmt.Sprintf("-%05d", partNumber)
   266  	err = l.Put(ctx, block.ObjectPointer{StorageNamespace: destinationObj.StorageNamespace, Identifier: fName}, -1, md5Read, block.PutOpts{})
   267  	if err != nil {
   268  		return nil, fmt.Errorf("copy range put: %w", err)
   269  	}
   270  	etag := hex.EncodeToString(md5Read.Md5.Sum(nil))
   271  	return &block.UploadPartResponse{
   272  		ETag: etag,
   273  	}, err
   274  }
   275  
   276  func (l *Adapter) Get(_ context.Context, obj block.ObjectPointer, _ int64) (reader io.ReadCloser, err error) {
   277  	p, err := l.extractParamsFromObj(obj)
   278  	if err != nil {
   279  		return nil, err
   280  	}
   281  	f, err := os.OpenFile(filepath.Clean(p), os.O_RDONLY, 0o600) //nolint: mnd
   282  	if os.IsNotExist(err) {
   283  		return nil, block.ErrDataNotFound
   284  	}
   285  	if err != nil {
   286  		return nil, err
   287  	}
   288  	return f, nil
   289  }
   290  
   291  func (l *Adapter) GetWalker(uri *url.URL) (block.Walker, error) {
   292  	if err := block.ValidateStorageType(uri, block.StorageTypeLocal); err != nil {
   293  		return nil, err
   294  	}
   295  
   296  	err := VerifyAbsPath(uri.Path, l.path, l.allowedExternalPrefixes)
   297  	if err != nil {
   298  		return nil, err
   299  	}
   300  	return NewLocalWalker(params.Local{
   301  		Path:                    l.path,
   302  		ImportEnabled:           l.importEnabled,
   303  		AllowedExternalPrefixes: l.allowedExternalPrefixes,
   304  	}), nil
   305  }
   306  
   307  func (l *Adapter) Exists(_ context.Context, obj block.ObjectPointer) (bool, error) {
   308  	p, err := l.extractParamsFromObj(obj)
   309  	if err != nil {
   310  		return false, err
   311  	}
   312  	_, err = os.Stat(p)
   313  	if err != nil {
   314  		if os.IsNotExist(err) {
   315  			return false, nil
   316  		}
   317  		return false, err
   318  	}
   319  	return true, nil
   320  }
   321  
   322  func (l *Adapter) GetRange(_ context.Context, obj block.ObjectPointer, start int64, end int64) (io.ReadCloser, error) {
   323  	if start < 0 || end < start {
   324  		return nil, block.ErrBadIndex
   325  	}
   326  	p, err := l.extractParamsFromObj(obj)
   327  	if err != nil {
   328  		return nil, err
   329  	}
   330  	f, err := os.Open(filepath.Clean(p))
   331  	if err != nil {
   332  		if os.IsNotExist(err) {
   333  			return nil, block.ErrDataNotFound
   334  		}
   335  		return nil, err
   336  	}
   337  	return &struct {
   338  		io.Reader
   339  		io.Closer
   340  	}{
   341  		Reader: io.NewSectionReader(f, start, end-start+1),
   342  		Closer: f,
   343  	}, nil
   344  }
   345  
   346  func (l *Adapter) GetProperties(_ context.Context, obj block.ObjectPointer) (block.Properties, error) {
   347  	p, err := l.extractParamsFromObj(obj)
   348  	if err != nil {
   349  		return block.Properties{}, err
   350  	}
   351  	_, err = os.Stat(p)
   352  	if err != nil {
   353  		return block.Properties{}, err
   354  	}
   355  	// No properties, just return that it exists
   356  	return block.Properties{}, nil
   357  }
   358  
   359  // isDirectoryWritable tests that pth, which must not be controllable by user input, is a
   360  // writable directory.  As there is no simple way to test this in windows, I prefer the "brute
   361  // force" method of creating s dummy file.  Will work in any OS.  speed is not an issue, as
   362  // this will be activated very few times during startup.
   363  func isDirectoryWritable(pth string) bool {
   364  	f, err := os.CreateTemp(pth, "dummy")
   365  	if err != nil {
   366  		return false
   367  	}
   368  	_ = f.Close()
   369  	_ = os.Remove(f.Name())
   370  	return true
   371  }
   372  
   373  func (l *Adapter) CreateMultiPartUpload(_ context.Context, obj block.ObjectPointer, _ *http.Request, _ block.CreateMultiPartUploadOpts) (*block.CreateMultiPartUploadResponse, error) {
   374  	if strings.Contains(obj.Identifier, "/") {
   375  		fullPath, err := l.extractParamsFromObj(obj)
   376  		if err != nil {
   377  			return nil, err
   378  		}
   379  		fullDir := path.Dir(fullPath)
   380  		err = os.MkdirAll(fullDir, 0o750) //nolint: mnd
   381  		if err != nil {
   382  			return nil, err
   383  		}
   384  	}
   385  	uidBytes := uuid.New()
   386  	uploadID := hex.EncodeToString(uidBytes[:])
   387  	return &block.CreateMultiPartUploadResponse{
   388  		UploadID: uploadID,
   389  	}, nil
   390  }
   391  
   392  func (l *Adapter) UploadPart(ctx context.Context, obj block.ObjectPointer, _ int64, reader io.Reader, uploadID string, partNumber int) (*block.UploadPartResponse, error) {
   393  	if err := isValidUploadID(uploadID); err != nil {
   394  		return nil, err
   395  	}
   396  	md5Read := block.NewHashingReader(reader, block.HashFunctionMD5)
   397  	fName := uploadID + fmt.Sprintf("-%05d", partNumber)
   398  	err := l.Put(ctx, block.ObjectPointer{StorageNamespace: obj.StorageNamespace, Identifier: fName}, -1, md5Read, block.PutOpts{})
   399  	etag := hex.EncodeToString(md5Read.Md5.Sum(nil))
   400  	return &block.UploadPartResponse{
   401  		ETag: etag,
   402  	}, err
   403  }
   404  
   405  func (l *Adapter) AbortMultiPartUpload(_ context.Context, obj block.ObjectPointer, uploadID string) error {
   406  	if err := isValidUploadID(uploadID); err != nil {
   407  		return err
   408  	}
   409  	files, err := l.getPartFiles(uploadID, obj)
   410  	if err != nil {
   411  		return err
   412  	}
   413  	if err = l.removePartFiles(files); err != nil {
   414  		return err
   415  	}
   416  	return nil
   417  }
   418  
   419  func (l *Adapter) CompleteMultiPartUpload(_ context.Context, obj block.ObjectPointer, uploadID string, multipartList *block.MultipartUploadCompletion) (*block.CompleteMultiPartUploadResponse, error) {
   420  	if err := isValidUploadID(uploadID); err != nil {
   421  		return nil, err
   422  	}
   423  	etag := computeETag(multipartList.Part) + "-" + strconv.Itoa(len(multipartList.Part))
   424  	partFiles, err := l.getPartFiles(uploadID, obj)
   425  	if err != nil {
   426  		return nil, fmt.Errorf("part files not found for %s: %w", uploadID, err)
   427  	}
   428  	size, err := l.unitePartFiles(obj, partFiles)
   429  	if err != nil {
   430  		return nil, fmt.Errorf("multipart upload unite for %s: %w", uploadID, err)
   431  	}
   432  	if err = l.removePartFiles(partFiles); err != nil {
   433  		return nil, err
   434  	}
   435  	return &block.CompleteMultiPartUploadResponse{
   436  		ETag:          etag,
   437  		ContentLength: size,
   438  	}, nil
   439  }
   440  
   441  func computeETag(parts []block.MultipartPart) string {
   442  	var etagHex []string
   443  	for _, p := range parts {
   444  		e := strings.Trim(p.ETag, `"`)
   445  		etagHex = append(etagHex, e)
   446  	}
   447  	s := strings.Join(etagHex, "")
   448  	b, _ := hex.DecodeString(s)
   449  	md5res := md5.Sum(b) //nolint:gosec
   450  	csm := hex.EncodeToString(md5res[:])
   451  	return csm
   452  }
   453  
   454  func (l *Adapter) unitePartFiles(identifier block.ObjectPointer, filenames []string) (int64, error) {
   455  	p, err := l.extractParamsFromObj(identifier)
   456  	if err != nil {
   457  		return 0, err
   458  	}
   459  	unitedFile, err := os.Create(p)
   460  	if err != nil {
   461  		return 0, fmt.Errorf("create path %s: %w", p, err)
   462  	}
   463  	files := make([]*os.File, 0, len(filenames))
   464  	defer func() {
   465  		_ = unitedFile.Close()
   466  		for _, f := range files {
   467  			_ = f.Close()
   468  		}
   469  	}()
   470  	for _, name := range filenames {
   471  		if err := l.verifyRelPath(name); err != nil {
   472  			return 0, err
   473  		}
   474  		f, err := os.Open(filepath.Clean(name))
   475  		if err != nil {
   476  			return 0, fmt.Errorf("open file %s: %w", name, err)
   477  		}
   478  		files = append(files, f)
   479  	}
   480  	// convert slice file files to readers
   481  	readers := make([]io.Reader, len(files))
   482  	for i := range files {
   483  		readers[i] = files[i]
   484  	}
   485  	unitedReader := io.MultiReader(readers...)
   486  	return io.Copy(unitedFile, unitedReader)
   487  }
   488  
   489  func (l *Adapter) removePartFiles(files []string) error {
   490  	var firstErr error
   491  	for _, name := range files {
   492  		if err := l.verifyRelPath(name); err != nil {
   493  			if firstErr == nil {
   494  				firstErr = err
   495  			}
   496  		}
   497  		// If removal fails prefer to skip the error: "only" wasted space.
   498  		_ = os.Remove(name)
   499  	}
   500  	return firstErr
   501  }
   502  
   503  func (l *Adapter) getPartFiles(uploadID string, obj block.ObjectPointer) ([]string, error) {
   504  	newObj := block.ObjectPointer{
   505  		StorageNamespace: obj.StorageNamespace,
   506  		Identifier:       uploadID,
   507  	}
   508  	globPathPattern, err := l.extractParamsFromObj(newObj)
   509  	if err != nil {
   510  		return nil, err
   511  	}
   512  	globPathPattern += "*"
   513  	names, err := filepath.Glob(globPathPattern)
   514  	if err != nil {
   515  		return nil, err
   516  	}
   517  	sort.Strings(names)
   518  	return names, nil
   519  }
   520  
   521  func (l *Adapter) BlockstoreType() string {
   522  	return block.BlockstoreTypeLocal
   523  }
   524  
   525  func (l *Adapter) GetStorageNamespaceInfo() block.StorageNamespaceInfo {
   526  	info := block.DefaultStorageNamespaceInfo(block.BlockstoreTypeLocal)
   527  	info.PreSignSupport = false
   528  	info.DefaultNamespacePrefix = DefaultNamespacePrefix
   529  	info.ImportSupport = l.importEnabled
   530  	return info
   531  }
   532  
   533  func (l *Adapter) ResolveNamespace(storageNamespace, key string, identifierType block.IdentifierType) (block.QualifiedKey, error) {
   534  	qk, err := block.DefaultResolveNamespace(storageNamespace, key, identifierType)
   535  	if err != nil {
   536  		return nil, err
   537  	}
   538  
   539  	// Check if path allowed and return error if path is not allowed
   540  	_, err = l.extractParamsFromObj(block.ObjectPointer{
   541  		StorageNamespace: storageNamespace,
   542  		Identifier:       key,
   543  		IdentifierType:   identifierType,
   544  	})
   545  	if err != nil {
   546  		return nil, err
   547  	}
   548  
   549  	return QualifiedKey{
   550  		CommonQualifiedKey: qk,
   551  		path:               l.path,
   552  	}, nil
   553  }
   554  
   555  func (l *Adapter) RuntimeStats() map[string]string {
   556  	return nil
   557  }
   558  
   559  func VerifyAbsPath(absPath, adapterPath string, allowedPrefixes []string) error {
   560  	// check we have a valid abs path
   561  	if !filepath.IsAbs(absPath) || filepath.Clean(absPath) != absPath {
   562  		return ErrBadPath
   563  	}
   564  	// point to storage namespace
   565  	if strings.HasPrefix(absPath, adapterPath) {
   566  		return nil
   567  	}
   568  	// allowed places
   569  	if !slices.ContainsFunc(allowedPrefixes, func(prefix string) bool {
   570  		return strings.HasPrefix(absPath, prefix)
   571  	}) {
   572  		return block.ErrForbidden
   573  	}
   574  	return nil
   575  }
   576  
   577  func isValidUploadID(uploadID string) error {
   578  	_, err := hex.DecodeString(uploadID)
   579  	if err != nil {
   580  		return fmt.Errorf("%w: %s", ErrInvalidUploadIDFormat, err)
   581  	}
   582  	return nil
   583  }
   584  
   585  func (l *Adapter) GetPresignUploadPartURL(_ context.Context, _ block.ObjectPointer, _ string, _ int) (string, error) {
   586  	return "", block.ErrOperationNotSupported
   587  }
   588  
   589  func (l *Adapter) ListParts(_ context.Context, _ block.ObjectPointer, _ string, _ block.ListPartsOpts) (*block.ListPartsResponse, error) {
   590  	return nil, block.ErrOperationNotSupported
   591  }