github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/gateway/operations/listobjects.go (about)

     1  package operations
     2  
     3  import (
     4  	"errors"
     5  	"net/http"
     6  	"strconv"
     7  	"strings"
     8  
     9  	"github.com/treeverse/lakefs/pkg/catalog"
    10  	gatewayerrors "github.com/treeverse/lakefs/pkg/gateway/errors"
    11  	"github.com/treeverse/lakefs/pkg/gateway/path"
    12  	"github.com/treeverse/lakefs/pkg/gateway/serde"
    13  	"github.com/treeverse/lakefs/pkg/graveler"
    14  	"github.com/treeverse/lakefs/pkg/httputil"
    15  	"github.com/treeverse/lakefs/pkg/logging"
    16  	"github.com/treeverse/lakefs/pkg/permissions"
    17  )
    18  
    19  const (
    20  	ListObjectMaxKeys = 1000
    21  
    22  	// defaultBucketLocation used to identify if we need to specify the location constraint
    23  	defaultBucketLocation = "us-east-1"
    24  )
    25  
    26  type ListObjects struct{}
    27  
    28  func (controller *ListObjects) RequiredPermissions(req *http.Request, repoID string) (permissions.Node, error) {
    29  	// check if we're listing files in a branch, or listing branches
    30  	params := req.URL.Query()
    31  	delimiter := params.Get("delimiter")
    32  	prefix := params.Get("prefix")
    33  	if delimiter == "/" && !strings.Contains(prefix, "/") {
    34  		return permissions.Node{
    35  			Permission: permissions.Permission{
    36  				Action:   permissions.ListBranchesAction,
    37  				Resource: permissions.RepoArn(repoID),
    38  			},
    39  		}, nil
    40  	}
    41  
    42  	// otherwise, we're listing objects within a branch
    43  	return permissions.Node{
    44  		Permission: permissions.Permission{
    45  			Action:   permissions.ListObjectsAction,
    46  			Resource: permissions.RepoArn(repoID),
    47  		},
    48  	}, nil
    49  }
    50  
    51  func (controller *ListObjects) getMaxKeys(req *http.Request, _ *RepoOperation) int {
    52  	params := req.URL.Query()
    53  	maxKeys := ListObjectMaxKeys
    54  	maxKeysParam := params.Get("max-keys")
    55  	if len(maxKeysParam) > 0 {
    56  		parsedKeys, err := strconv.Atoi(maxKeysParam)
    57  		if err == nil {
    58  			maxKeys = parsedKeys
    59  		}
    60  	}
    61  	return maxKeys
    62  }
    63  
    64  func (controller *ListObjects) serializeEntries(ref string, entries []*catalog.DBEntry) ([]serde.CommonPrefixes, []serde.Contents, string) {
    65  	dirs := make([]serde.CommonPrefixes, 0)
    66  	files := make([]serde.Contents, 0)
    67  	var lastKey string
    68  	for _, entry := range entries {
    69  		lastKey = entry.Path
    70  		if entry.CommonLevel {
    71  			dirs = append(dirs, serde.CommonPrefixes{Prefix: path.WithRef(entry.Path, ref)})
    72  		} else {
    73  			files = append(files, serde.Contents{
    74  				Key:          path.WithRef(entry.Path, ref),
    75  				LastModified: serde.Timestamp(entry.CreationDate),
    76  				ETag:         httputil.ETag(entry.Checksum),
    77  				Size:         entry.Size,
    78  				StorageClass: "STANDARD",
    79  			})
    80  		}
    81  	}
    82  	return dirs, files, lastKey
    83  }
    84  
    85  func (controller *ListObjects) serializeBranches(branches []*catalog.Branch) ([]serde.CommonPrefixes, string) {
    86  	dirs := make([]serde.CommonPrefixes, 0)
    87  	var lastKey string
    88  	for _, branch := range branches {
    89  		lastKey = branch.Name
    90  		dirs = append(dirs, serde.CommonPrefixes{Prefix: path.WithRef("", branch.Name)})
    91  	}
    92  	return dirs, lastKey
    93  }
    94  
    95  func (controller *ListObjects) ListV2(w http.ResponseWriter, req *http.Request, o *RepoOperation) {
    96  	req = req.WithContext(logging.AddFields(req.Context(), logging.Fields{
    97  		logging.ListTypeFieldKey: "v2",
    98  	}))
    99  	params := req.URL.Query()
   100  	delimiter := params.Get("delimiter")
   101  	startAfter := params.Get("start-after")
   102  	continuationToken := params.Get("continuation-token")
   103  
   104  	// resolve "from"
   105  	var fromStr string
   106  	if len(startAfter) > 0 {
   107  		fromStr = startAfter
   108  	}
   109  	if len(continuationToken) > 0 {
   110  		// take this instead
   111  		fromStr = continuationToken
   112  	}
   113  
   114  	maxKeys := controller.getMaxKeys(req, o)
   115  
   116  	var results []*catalog.DBEntry
   117  	var hasMore bool
   118  	var ref string
   119  	// should we list branches?
   120  	prefix, err := path.ResolvePath(params.Get("prefix"))
   121  	if err != nil {
   122  		o.Log(req).
   123  			WithError(err).
   124  			WithField("path", params.Get("prefix")).
   125  			Error("could not resolve path for prefix")
   126  		_ = o.EncodeError(w, req, err, gatewayerrors.Codes.ToAPIErr(gatewayerrors.ErrBadRequest))
   127  		return
   128  	}
   129  
   130  	var from path.ResolvedPath
   131  	if !prefix.WithPath {
   132  		// list branches then.
   133  		branchPrefix := prefix.Ref // TODO: same prefix logic also in V1!!!!!
   134  		o.Log(req).WithField("prefix", branchPrefix).Debug("listing branches with prefix")
   135  		branches, hasMore, err := o.Catalog.ListBranches(req.Context(), o.Repository.Name, branchPrefix, maxKeys, fromStr)
   136  		if err != nil {
   137  			o.Log(req).WithError(err).Error("could not list branches")
   138  			_ = o.EncodeError(w, req, err, gatewayerrors.Codes.ToAPIErr(gatewayerrors.ErrInternalError))
   139  			return
   140  		}
   141  		// return branch response
   142  		dirs, lastKey := controller.serializeBranches(branches)
   143  		resp := serde.ListObjectsV2Output{
   144  			Name:           o.Repository.Name,
   145  			Prefix:         params.Get("prefix"),
   146  			Delimiter:      delimiter,
   147  			KeyCount:       len(dirs),
   148  			MaxKeys:        maxKeys,
   149  			CommonPrefixes: dirs,
   150  			Contents:       make([]serde.Contents, 0),
   151  		}
   152  
   153  		if len(continuationToken) > 0 && strings.EqualFold(continuationToken, fromStr) {
   154  			resp.ContinuationToken = continuationToken
   155  		}
   156  
   157  		if hasMore {
   158  			resp.IsTruncated = true
   159  			resp.NextContinuationToken = lastKey
   160  		}
   161  
   162  		o.EncodeResponse(w, req, resp, http.StatusOK)
   163  		return
   164  	} else {
   165  		// list objects then.
   166  		ref = prefix.Ref
   167  		if len(fromStr) > 0 {
   168  			from, err = path.ResolvePath(fromStr)
   169  			if err != nil || !strings.EqualFold(from.Ref, prefix.Ref) {
   170  				o.Log(req).WithError(err).WithFields(logging.Fields{
   171  					"branch": prefix.Ref,
   172  					"path":   prefix.Path,
   173  					"from":   fromStr,
   174  				}).Error("invalid marker - doesnt start with branch name")
   175  				_ = o.EncodeError(w, req, err, gatewayerrors.Codes.ToAPIErr(gatewayerrors.ErrBadRequest))
   176  				return
   177  			}
   178  		}
   179  
   180  		results, hasMore, err = o.Catalog.ListEntries(
   181  			req.Context(),
   182  			o.Repository.Name,
   183  			prefix.Ref,
   184  			prefix.Path,
   185  			from.Path,
   186  			delimiter,
   187  			maxKeys,
   188  		)
   189  		log := o.Log(req).WithError(err).WithFields(logging.Fields{
   190  			"ref":  prefix.Ref,
   191  			"path": prefix.Path,
   192  		})
   193  		if errors.Is(err, graveler.ErrBranchNotFound) {
   194  			log.Debug("could not list objects in path")
   195  		} else if err != nil {
   196  			log.Error("could not list objects in path")
   197  			_ = o.EncodeError(w, req, err, gatewayerrors.Codes.ToAPIErr(gatewayerrors.ErrBadRequest))
   198  			return
   199  		}
   200  	}
   201  
   202  	dirs, files, lastKey := controller.serializeEntries(ref, results)
   203  	resp := serde.ListObjectsV2Output{
   204  		Name:           o.Repository.Name,
   205  		Prefix:         params.Get("prefix"),
   206  		Delimiter:      delimiter,
   207  		KeyCount:       len(results),
   208  		MaxKeys:        maxKeys,
   209  		CommonPrefixes: dirs,
   210  		Contents:       files,
   211  	}
   212  
   213  	if len(continuationToken) > 0 && strings.EqualFold(continuationToken, fromStr) {
   214  		resp.ContinuationToken = continuationToken
   215  	}
   216  
   217  	if hasMore {
   218  		resp.IsTruncated = true
   219  		resp.NextContinuationToken = path.WithRef(lastKey, ref)
   220  	}
   221  
   222  	o.EncodeResponse(w, req, resp, http.StatusOK)
   223  }
   224  
   225  func (controller *ListObjects) ListV1(w http.ResponseWriter, req *http.Request, o *RepoOperation) {
   226  	req = req.WithContext(logging.AddFields(req.Context(), logging.Fields{
   227  		logging.ListTypeFieldKey: "v1",
   228  	}))
   229  	// handle ListObjects (v1)
   230  	params := req.URL.Query()
   231  	delimiter := params.Get("delimiter")
   232  	descend := true
   233  	if len(delimiter) >= 1 {
   234  		descend = false
   235  	}
   236  
   237  	maxKeys := controller.getMaxKeys(req, o)
   238  
   239  	var results []*catalog.DBEntry
   240  	hasMore := false
   241  
   242  	var ref string
   243  	// should we list branches?
   244  	prefix, err := path.ResolvePath(params.Get("prefix"))
   245  	if err != nil {
   246  		o.Log(req).
   247  			WithError(err).
   248  			WithField("path", params.Get("prefix")).
   249  			Error("could not resolve path for prefix")
   250  		_ = o.EncodeError(w, req, err, gatewayerrors.Codes.ToAPIErr(gatewayerrors.ErrBadRequest))
   251  		return
   252  	}
   253  
   254  	if !prefix.WithPath {
   255  		// list branches then.
   256  		branches, hasMore, err := o.Catalog.ListBranches(req.Context(), o.Repository.Name, prefix.Ref, maxKeys, params.Get("marker"))
   257  		if err != nil {
   258  			// TODO incorrect error type
   259  			o.Log(req).WithError(err).Error("could not list branches")
   260  			_ = o.EncodeError(w, req, err, gatewayerrors.Codes.ToAPIErr(gatewayerrors.ErrBadRequest))
   261  			return
   262  		}
   263  		// return branch response
   264  		dirs, lastKey := controller.serializeBranches(branches)
   265  		resp := serde.ListBucketResult{
   266  			Name:           o.Repository.Name,
   267  			Prefix:         params.Get("prefix"),
   268  			Delimiter:      delimiter,
   269  			Marker:         params.Get("marker"),
   270  			KeyCount:       len(results),
   271  			MaxKeys:        maxKeys,
   272  			CommonPrefixes: dirs,
   273  			Contents:       make([]serde.Contents, 0),
   274  		}
   275  
   276  		if hasMore {
   277  			resp.IsTruncated = true
   278  			if !descend {
   279  				// NextMarker is only set if a delimiter exists
   280  				resp.NextMarker = lastKey
   281  			}
   282  		}
   283  
   284  		o.EncodeResponse(w, req, resp, http.StatusOK)
   285  		return
   286  	} else {
   287  		prefix, err := path.ResolvePath(params.Get("prefix"))
   288  		if err != nil {
   289  			o.Log(req).WithError(err).Error("could not list branches")
   290  			_ = o.EncodeError(w, req, err, gatewayerrors.Codes.ToAPIErr(gatewayerrors.ErrBadRequest))
   291  			return
   292  		}
   293  		ref = prefix.Ref
   294  		// see if we have a continuation token in the request to pick up from
   295  		var marker path.ResolvedPath
   296  		// strip the branch from the marker
   297  		if len(params.Get("marker")) > 0 {
   298  			marker, err = path.ResolvePath(params.Get("marker"))
   299  			if err != nil || !strings.EqualFold(marker.Ref, prefix.Ref) {
   300  				o.Log(req).WithError(err).WithFields(logging.Fields{
   301  					"branch": prefix.Ref,
   302  					"path":   prefix.Path,
   303  					"marker": marker,
   304  				}).Error("invalid marker - doesnt start with branch name")
   305  				_ = o.EncodeError(w, req, err, gatewayerrors.Codes.ToAPIErr(gatewayerrors.ErrBadRequest))
   306  				return
   307  			}
   308  		}
   309  		results, hasMore, err = o.Catalog.ListEntries(
   310  			req.Context(),
   311  			o.Repository.Name,
   312  			prefix.Ref,
   313  			prefix.Path,
   314  			marker.Path,
   315  			delimiter,
   316  			maxKeys,
   317  		)
   318  		if errors.Is(err, graveler.ErrNotFound) {
   319  			results = make([]*catalog.DBEntry, 0) // no results found
   320  		} else if err != nil {
   321  			o.Log(req).WithError(err).WithFields(logging.Fields{
   322  				"branch": prefix.Ref,
   323  				"path":   prefix.Path,
   324  			}).Error("could not list objects in path")
   325  			_ = o.EncodeError(w, req, err, gatewayerrors.Codes.ToAPIErr(gatewayerrors.ErrBadRequest))
   326  			return
   327  		}
   328  	}
   329  
   330  	// build a response
   331  	dirs, files, lastKey := controller.serializeEntries(ref, results)
   332  	resp := serde.ListBucketResult{
   333  		Name:           o.Repository.Name,
   334  		Prefix:         params.Get("prefix"),
   335  		Delimiter:      delimiter,
   336  		Marker:         params.Get("marker"),
   337  		KeyCount:       len(results),
   338  		MaxKeys:        maxKeys,
   339  		CommonPrefixes: dirs,
   340  		Contents:       files,
   341  	}
   342  
   343  	if hasMore {
   344  		resp.IsTruncated = true
   345  		if !descend {
   346  			// NextMarker is only set if a delimiter exists
   347  			resp.NextMarker = path.WithRef(lastKey, ref)
   348  		}
   349  	}
   350  
   351  	o.EncodeResponse(w, req, resp, http.StatusOK)
   352  }
   353  
   354  func (controller *ListObjects) Handle(w http.ResponseWriter, req *http.Request, o *RepoOperation) {
   355  	if o.HandleUnsupported(w, req, "inventory", "metrics", "publicAccessBlock", "ownershipControls",
   356  		"intelligent-tiering", "analytics", "policy", "lifecycle", "encryption", "object-lock", "replication",
   357  		"notification", "events", "acl", "cors", "website", "accelerate",
   358  		"requestPayment", "logging", "tagging", "uploads", "versions", "policyStatus") {
   359  		return
   360  	}
   361  	query := req.URL.Query()
   362  
   363  	// getbucketlocation support
   364  	if query.Has("location") {
   365  		o.Incr("get_bucket_location", o.Principal, o.Repository.Name, "")
   366  		response := serde.LocationResponse{}
   367  		if o.Region != "" && o.Region != defaultBucketLocation {
   368  			response.Location = o.Region
   369  		}
   370  		o.EncodeResponse(w, req, response, http.StatusOK)
   371  		return
   372  	}
   373  
   374  	// getbucketversioing support
   375  	if query.Has("versioning") {
   376  		o.EncodeXMLBytes(w, req, []byte(serde.VersioningResponse), http.StatusOK)
   377  		return
   378  	}
   379  	o.Incr("list_objects", o.Principal, o.Repository.Name, "")
   380  
   381  	// parse request parameters
   382  	// GET /example?list-type=2&prefix=main%2F&delimiter=%2F&encoding-type=url HTTP/1.1
   383  
   384  	// handle ListObjects versions
   385  	listType := query.Get("list-type")
   386  	switch listType {
   387  	case "", "1":
   388  		controller.ListV1(w, req, o)
   389  	case "2":
   390  		controller.ListV2(w, req, o)
   391  	default:
   392  		o.Log(req).WithField("list-type", listType).Error("listObjects version not supported")
   393  		_ = o.EncodeError(w, req, nil, gatewayerrors.Codes.ToAPIErr(gatewayerrors.ErrBadRequest))
   394  	}
   395  }