github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/s3-zip-handlers.go (about)

     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"bytes"
    22  	"context"
    23  	"errors"
    24  	"io"
    25  	"net/http"
    26  	"sort"
    27  	"strings"
    28  
    29  	"github.com/minio/minio/internal/auth"
    30  	"github.com/minio/minio/internal/crypto"
    31  	xhttp "github.com/minio/minio/internal/http"
    32  	xioutil "github.com/minio/minio/internal/ioutil"
    33  	"github.com/minio/pkg/v2/policy"
    34  	"github.com/minio/zipindex"
    35  )
    36  
    37  const (
    38  	archiveType            = "zip"
    39  	archiveTypeEnc         = "zip-enc"
    40  	archiveExt             = "." + archiveType // ".zip"
    41  	archiveSeparator       = "/"
    42  	archivePattern         = archiveExt + archiveSeparator                // ".zip/"
    43  	archiveTypeMetadataKey = ReservedMetadataPrefixLower + "archive-type" // "x-minio-internal-archive-type"
    44  	archiveInfoMetadataKey = ReservedMetadataPrefixLower + "archive-info" // "x-minio-internal-archive-info"
    45  
    46  	// Peek into a zip archive
    47  	xMinIOExtract = "x-minio-extract"
    48  )
    49  
    50  // splitZipExtensionPath splits the S3 path to the zip file and the path inside the zip:
    51  //
    52  //	e.g  /path/to/archive.zip/backup-2021/myimage.png => /path/to/archive.zip, backup/myimage.png
    53  func splitZipExtensionPath(input string) (zipPath, object string, err error) {
    54  	idx := strings.Index(input, archivePattern)
    55  	if idx < 0 {
    56  		// Should never happen
    57  		return "", "", errors.New("unable to parse zip path")
    58  	}
    59  	return input[:idx+len(archivePattern)-1], input[idx+len(archivePattern):], nil
    60  }
    61  
    62  // getObjectInArchiveFileHandler - GET Object in the archive file
    63  func (api objectAPIHandlers) getObjectInArchiveFileHandler(ctx context.Context, objectAPI ObjectLayer, bucket, object string, w http.ResponseWriter, r *http.Request) {
    64  	if crypto.S3.IsRequested(r.Header) || crypto.S3KMS.IsRequested(r.Header) { // If SSE-S3 or SSE-KMS present -> AWS fails with undefined error
    65  		writeErrorResponse(ctx, w, errorCodes.ToAPIErr(ErrBadRequest), r.URL)
    66  		return
    67  	}
    68  
    69  	zipPath, object, err := splitZipExtensionPath(object)
    70  	if err != nil {
    71  		writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
    72  		return
    73  	}
    74  
    75  	opts, err := getOpts(ctx, r, bucket, zipPath)
    76  	if err != nil {
    77  		writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
    78  		return
    79  	}
    80  
    81  	getObjectInfo := objectAPI.GetObjectInfo
    82  
    83  	// Check for auth type to return S3 compatible error.
    84  	// type to return the correct error (NoSuchKey vs AccessDenied)
    85  	if s3Error := checkRequestAuthType(ctx, r, policy.GetObjectAction, bucket, zipPath); s3Error != ErrNone {
    86  		if getRequestAuthType(r) == authTypeAnonymous {
    87  			// As per "Permission" section in
    88  			// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectGET.html
    89  			// If the object you request does not exist,
    90  			// the error Amazon S3 returns depends on
    91  			// whether you also have the s3:ListBucket
    92  			// permission.
    93  			// * If you have the s3:ListBucket permission
    94  			//   on the bucket, Amazon S3 will return an
    95  			//   HTTP status code 404 ("no such key")
    96  			//   error.
    97  			// * if you don’t have the s3:ListBucket
    98  			//   permission, Amazon S3 will return an HTTP
    99  			//   status code 403 ("access denied") error.`
   100  			if globalPolicySys.IsAllowed(policy.BucketPolicyArgs{
   101  				Action:          policy.ListBucketAction,
   102  				BucketName:      bucket,
   103  				ConditionValues: getConditionValues(r, "", auth.AnonymousCredentials),
   104  				IsOwner:         false,
   105  			}) {
   106  				_, err = getObjectInfo(ctx, bucket, zipPath, opts)
   107  				if toAPIError(ctx, err).Code == "NoSuchKey" {
   108  					s3Error = ErrNoSuchKey
   109  				}
   110  			}
   111  		}
   112  		writeErrorResponse(ctx, w, errorCodes.ToAPIErr(s3Error), r.URL)
   113  		return
   114  	}
   115  
   116  	// We do not allow offsetting into extracted files.
   117  	if opts.PartNumber != 0 {
   118  		writeErrorResponse(ctx, w, errorCodes.ToAPIErr(ErrInvalidPartNumber), r.URL)
   119  		return
   120  	}
   121  
   122  	if r.Header.Get(xhttp.Range) != "" {
   123  		writeErrorResponse(ctx, w, errorCodes.ToAPIErr(ErrInvalidRange), r.URL)
   124  		return
   125  	}
   126  
   127  	// Validate pre-conditions if any.
   128  	opts.CheckPrecondFn = func(oi ObjectInfo) bool {
   129  		if _, err := DecryptObjectInfo(&oi, r); err != nil {
   130  			writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
   131  			return true
   132  		}
   133  
   134  		return checkPreconditions(ctx, w, r, oi, opts)
   135  	}
   136  
   137  	zipObjInfo, err := getObjectInfo(ctx, bucket, zipPath, opts)
   138  	if err != nil {
   139  		writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
   140  		return
   141  	}
   142  
   143  	zipInfo := zipObjInfo.ArchiveInfo()
   144  	if len(zipInfo) == 0 {
   145  		opts.EncryptFn, err = zipObjInfo.metadataEncryptFn(r.Header)
   146  		if err != nil {
   147  			writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
   148  			return
   149  		}
   150  
   151  		zipInfo, err = updateObjectMetadataWithZipInfo(ctx, objectAPI, bucket, zipPath, opts)
   152  	}
   153  	if err != nil {
   154  		writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
   155  		return
   156  	}
   157  	file, err := zipindex.FindSerialized(zipInfo, object)
   158  	if err != nil {
   159  		if err == io.EOF {
   160  			writeErrorResponse(ctx, w, errorCodes.ToAPIErr(ErrNoSuchKey), r.URL)
   161  		} else {
   162  			writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
   163  		}
   164  		return
   165  	}
   166  
   167  	// New object info
   168  	fileObjInfo := ObjectInfo{
   169  		Bucket:  bucket,
   170  		Name:    object,
   171  		Size:    int64(file.UncompressedSize64),
   172  		ModTime: zipObjInfo.ModTime,
   173  	}
   174  
   175  	var rc io.ReadCloser
   176  
   177  	if file.UncompressedSize64 > 0 {
   178  		// There may be number of header bytes before the content.
   179  		// Reading 64K extra. This should more than cover name and any "extra" details.
   180  		end := file.Offset + int64(file.CompressedSize64) + 64<<10
   181  		if end > zipObjInfo.Size {
   182  			end = zipObjInfo.Size
   183  		}
   184  		rs := &HTTPRangeSpec{Start: file.Offset, End: end}
   185  		gr, err := objectAPI.GetObjectNInfo(ctx, bucket, zipPath, rs, nil, opts)
   186  		if err != nil {
   187  			writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
   188  			return
   189  		}
   190  		defer gr.Close()
   191  		rc, err = file.Open(gr)
   192  		if err != nil {
   193  			writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
   194  			return
   195  		}
   196  	} else {
   197  		rc = io.NopCloser(bytes.NewReader([]byte{}))
   198  	}
   199  
   200  	defer rc.Close()
   201  
   202  	if err = setObjectHeaders(w, fileObjInfo, nil, opts); err != nil {
   203  		writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
   204  		return
   205  	}
   206  	// s3zip does not allow ranges
   207  	w.Header().Del(xhttp.AcceptRanges)
   208  
   209  	setHeadGetRespHeaders(w, r.Form)
   210  
   211  	httpWriter := xioutil.WriteOnClose(w)
   212  
   213  	// Write object content to response body
   214  	if _, err = xioutil.Copy(httpWriter, rc); err != nil {
   215  		if !httpWriter.HasWritten() {
   216  			// write error response only if no data or headers has been written to client yet
   217  			writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
   218  			return
   219  		}
   220  		return
   221  	}
   222  
   223  	if err = httpWriter.Close(); err != nil {
   224  		if !httpWriter.HasWritten() { // write error response only if no data or headers has been written to client yet
   225  			writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
   226  			return
   227  		}
   228  		return
   229  	}
   230  }
   231  
   232  // listObjectsV2InArchive generates S3 listing result ListObjectsV2Info from zip file, all parameters are already validated by the caller.
   233  func listObjectsV2InArchive(ctx context.Context, objectAPI ObjectLayer, bucket, prefix, token, delimiter string, maxKeys int, fetchOwner bool, startAfter string) (ListObjectsV2Info, error) {
   234  	zipPath, _, err := splitZipExtensionPath(prefix)
   235  	if err != nil {
   236  		// Return empty listing
   237  		return ListObjectsV2Info{}, nil
   238  	}
   239  
   240  	zipObjInfo, err := objectAPI.GetObjectInfo(ctx, bucket, zipPath, ObjectOptions{})
   241  	if err != nil {
   242  		// Return empty listing
   243  		return ListObjectsV2Info{}, nil
   244  	}
   245  
   246  	zipInfo := zipObjInfo.ArchiveInfo()
   247  	if len(zipInfo) == 0 {
   248  		// Always update the latest version
   249  		zipInfo, err = updateObjectMetadataWithZipInfo(ctx, objectAPI, bucket, zipPath, ObjectOptions{})
   250  	}
   251  	if err != nil {
   252  		return ListObjectsV2Info{}, err
   253  	}
   254  
   255  	files, err := zipindex.DeserializeFiles(zipInfo)
   256  	if err != nil {
   257  		return ListObjectsV2Info{}, err
   258  	}
   259  
   260  	sort.Slice(files, func(i, j int) bool {
   261  		return files[i].Name < files[j].Name
   262  	})
   263  
   264  	var (
   265  		count           int
   266  		isTruncated     bool
   267  		nextToken       string
   268  		listObjectsInfo ListObjectsV2Info
   269  	)
   270  
   271  	// Always set this
   272  	listObjectsInfo.ContinuationToken = token
   273  
   274  	// Open and iterate through the files in the archive.
   275  	for _, file := range files {
   276  		objName := zipObjInfo.Name + archiveSeparator + file.Name
   277  		if objName <= startAfter || objName <= token {
   278  			continue
   279  		}
   280  		if strings.HasPrefix(objName, prefix) {
   281  			if count == maxKeys {
   282  				isTruncated = true
   283  				break
   284  			}
   285  			if delimiter != "" {
   286  				i := strings.Index(objName[len(prefix):], delimiter)
   287  				if i >= 0 {
   288  					commonPrefix := objName[:len(prefix)+i+1]
   289  					if len(listObjectsInfo.Prefixes) == 0 || commonPrefix != listObjectsInfo.Prefixes[len(listObjectsInfo.Prefixes)-1] {
   290  						listObjectsInfo.Prefixes = append(listObjectsInfo.Prefixes, commonPrefix)
   291  						count++
   292  					}
   293  					goto next
   294  				}
   295  			}
   296  			listObjectsInfo.Objects = append(listObjectsInfo.Objects, ObjectInfo{
   297  				Bucket:  bucket,
   298  				Name:    objName,
   299  				Size:    int64(file.UncompressedSize64),
   300  				ModTime: zipObjInfo.ModTime,
   301  			})
   302  			count++
   303  		}
   304  	next:
   305  		nextToken = objName
   306  	}
   307  
   308  	if isTruncated {
   309  		listObjectsInfo.IsTruncated = true
   310  		listObjectsInfo.NextContinuationToken = nextToken
   311  	}
   312  
   313  	return listObjectsInfo, nil
   314  }
   315  
   316  // getFilesFromZIPObject reads a partial stream of a zip file to build the zipindex.Files index
   317  func getFilesListFromZIPObject(ctx context.Context, objectAPI ObjectLayer, bucket, object string, opts ObjectOptions) (zipindex.Files, ObjectInfo, error) {
   318  	size := 1 << 20
   319  	var objSize int64
   320  	for {
   321  		rs := &HTTPRangeSpec{IsSuffixLength: true, Start: int64(-size)}
   322  		gr, err := objectAPI.GetObjectNInfo(ctx, bucket, object, rs, nil, opts)
   323  		if err != nil {
   324  			return nil, ObjectInfo{}, err
   325  		}
   326  		b, err := io.ReadAll(gr)
   327  		gr.Close()
   328  		if err != nil {
   329  			return nil, ObjectInfo{}, err
   330  		}
   331  		if size > len(b) {
   332  			size = len(b)
   333  		}
   334  
   335  		// Calculate the object real size if encrypted
   336  		if _, ok := crypto.IsEncrypted(gr.ObjInfo.UserDefined); ok {
   337  			objSize, err = gr.ObjInfo.DecryptedSize()
   338  			if err != nil {
   339  				return nil, ObjectInfo{}, err
   340  			}
   341  		} else {
   342  			objSize = gr.ObjInfo.Size
   343  		}
   344  
   345  		files, err := zipindex.ReadDir(b[len(b)-size:], objSize, nil)
   346  		if err == nil {
   347  			return files, gr.ObjInfo, nil
   348  		}
   349  		var terr zipindex.ErrNeedMoreData
   350  		if errors.As(err, &terr) {
   351  			size = int(terr.FromEnd)
   352  			if size <= 0 || size > 100<<20 {
   353  				return nil, ObjectInfo{}, errors.New("zip directory too large")
   354  			}
   355  		} else {
   356  			return nil, ObjectInfo{}, err
   357  		}
   358  	}
   359  }
   360  
   361  // headObjectInArchiveFileHandler - HEAD Object in an archive file
   362  func (api objectAPIHandlers) headObjectInArchiveFileHandler(ctx context.Context, objectAPI ObjectLayer, bucket, object string, w http.ResponseWriter, r *http.Request) {
   363  	if crypto.S3.IsRequested(r.Header) || crypto.S3KMS.IsRequested(r.Header) { // If SSE-S3 or SSE-KMS present -> AWS fails with undefined error
   364  		writeErrorResponseHeadersOnly(w, errorCodes.ToAPIErr(ErrBadRequest))
   365  		return
   366  	}
   367  
   368  	zipPath, object, err := splitZipExtensionPath(object)
   369  	if err != nil {
   370  		writeErrorResponseHeadersOnly(w, toAPIError(ctx, err))
   371  		return
   372  	}
   373  
   374  	getObjectInfo := objectAPI.GetObjectInfo
   375  
   376  	opts, err := getOpts(ctx, r, bucket, zipPath)
   377  	if err != nil {
   378  		writeErrorResponseHeadersOnly(w, toAPIError(ctx, err))
   379  		return
   380  	}
   381  
   382  	if s3Error := checkRequestAuthType(ctx, r, policy.GetObjectAction, bucket, zipPath); s3Error != ErrNone {
   383  		if getRequestAuthType(r) == authTypeAnonymous {
   384  			// As per "Permission" section in
   385  			// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html
   386  			// If the object you request does not exist,
   387  			// the error Amazon S3 returns depends on
   388  			// whether you also have the s3:ListBucket
   389  			// permission.
   390  			// * If you have the s3:ListBucket permission
   391  			//   on the bucket, Amazon S3 will return an
   392  			//   HTTP status code 404 ("no such key")
   393  			//   error.
   394  			// * if you don’t have the s3:ListBucket
   395  			//   permission, Amazon S3 will return an HTTP
   396  			//   status code 403 ("access denied") error.`
   397  			if globalPolicySys.IsAllowed(policy.BucketPolicyArgs{
   398  				Action:          policy.ListBucketAction,
   399  				BucketName:      bucket,
   400  				ConditionValues: getConditionValues(r, "", auth.AnonymousCredentials),
   401  				IsOwner:         false,
   402  			}) {
   403  				_, err = getObjectInfo(ctx, bucket, zipPath, opts)
   404  				if toAPIError(ctx, err).Code == "NoSuchKey" {
   405  					s3Error = ErrNoSuchKey
   406  				}
   407  			}
   408  		}
   409  		errCode := errorCodes.ToAPIErr(s3Error)
   410  		w.Header().Set(xMinIOErrCodeHeader, errCode.Code)
   411  		w.Header().Set(xMinIOErrDescHeader, "\""+errCode.Description+"\"")
   412  		writeErrorResponseHeadersOnly(w, errCode)
   413  		return
   414  	}
   415  
   416  	// Validate pre-conditions if any.
   417  	opts.CheckPrecondFn = func(oi ObjectInfo) bool {
   418  		return checkPreconditions(ctx, w, r, oi, opts)
   419  	}
   420  
   421  	// We do not allow offsetting into extracted files.
   422  	if opts.PartNumber != 0 {
   423  		writeErrorResponseHeadersOnly(w, errorCodes.ToAPIErr(ErrInvalidPartNumber))
   424  		return
   425  	}
   426  
   427  	if r.Header.Get(xhttp.Range) != "" {
   428  		writeErrorResponseHeadersOnly(w, errorCodes.ToAPIErr(ErrInvalidRange))
   429  		return
   430  	}
   431  
   432  	zipObjInfo, err := getObjectInfo(ctx, bucket, zipPath, opts)
   433  	if err != nil {
   434  		writeErrorResponseHeadersOnly(w, toAPIError(ctx, err))
   435  		return
   436  	}
   437  
   438  	zipInfo := zipObjInfo.ArchiveInfo()
   439  	if len(zipInfo) == 0 {
   440  		opts.EncryptFn, err = zipObjInfo.metadataEncryptFn(r.Header)
   441  		if err != nil {
   442  			writeErrorResponseHeadersOnly(w, toAPIError(ctx, err))
   443  			return
   444  		}
   445  		zipInfo, err = updateObjectMetadataWithZipInfo(ctx, objectAPI, bucket, zipPath, opts)
   446  	}
   447  	if err != nil {
   448  		writeErrorResponseHeadersOnly(w, toAPIError(ctx, err))
   449  		return
   450  	}
   451  
   452  	file, err := zipindex.FindSerialized(zipInfo, object)
   453  	if err != nil {
   454  		if err == io.EOF {
   455  			writeErrorResponseHeadersOnly(w, errorCodes.ToAPIErr(ErrNoSuchKey))
   456  		} else {
   457  			writeErrorResponseHeadersOnly(w, toAPIError(ctx, err))
   458  		}
   459  		return
   460  	}
   461  
   462  	objInfo := ObjectInfo{
   463  		Bucket:  bucket,
   464  		Name:    file.Name,
   465  		Size:    int64(file.UncompressedSize64),
   466  		ModTime: zipObjInfo.ModTime,
   467  	}
   468  
   469  	// Set standard object headers.
   470  	if err = setObjectHeaders(w, objInfo, nil, opts); err != nil {
   471  		writeErrorResponseHeadersOnly(w, toAPIError(ctx, err))
   472  		return
   473  	}
   474  
   475  	// s3zip does not allow ranges.
   476  	w.Header().Del(xhttp.AcceptRanges)
   477  
   478  	// Set any additional requested response headers.
   479  	setHeadGetRespHeaders(w, r.Form)
   480  
   481  	// Successful response.
   482  	w.WriteHeader(http.StatusOK)
   483  }
   484  
   485  // Update the passed zip object metadata with the zip contents info, file name, modtime, size, etc.
   486  // The returned zip index will de decrypted.
   487  func updateObjectMetadataWithZipInfo(ctx context.Context, objectAPI ObjectLayer, bucket, object string, opts ObjectOptions) ([]byte, error) {
   488  	files, srcInfo, err := getFilesListFromZIPObject(ctx, objectAPI, bucket, object, opts)
   489  	if err != nil {
   490  		return nil, err
   491  	}
   492  	files.OptimizeSize()
   493  	zipInfo, err := files.Serialize()
   494  	if err != nil {
   495  		return nil, err
   496  	}
   497  	at := archiveType
   498  	zipInfoStr := string(zipInfo)
   499  	if opts.EncryptFn != nil {
   500  		at = archiveTypeEnc
   501  		zipInfoStr = string(opts.EncryptFn(archiveTypeEnc, zipInfo))
   502  	}
   503  	srcInfo.UserDefined[archiveTypeMetadataKey] = at
   504  	popts := ObjectOptions{
   505  		MTime:     srcInfo.ModTime,
   506  		VersionID: srcInfo.VersionID,
   507  		EvalMetadataFn: func(oi *ObjectInfo, gerr error) (dsc ReplicateDecision, err error) {
   508  			oi.UserDefined[archiveTypeMetadataKey] = at
   509  			oi.UserDefined[archiveInfoMetadataKey] = zipInfoStr
   510  			return dsc, nil
   511  		},
   512  	}
   513  
   514  	// For all other modes use in-place update to update metadata on a specific version.
   515  	if _, err = objectAPI.PutObjectMetadata(ctx, bucket, object, popts); err != nil {
   516  		return nil, err
   517  	}
   518  
   519  	return zipInfo, nil
   520  }