github.com/cs3org/reva/v2@v2.27.7/pkg/storage/utils/decomposedfs/upload/upload.go (about)

     1  // Copyright 2018-2022 CERN
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // In applying this license, CERN does not waive the privileges and immunities
    16  // granted to it by virtue of its status as an Intergovernmental Organization
    17  // or submit itself to any jurisdiction.
    18  
    19  package upload
    20  
    21  import (
    22  	"context"
    23  	"encoding/hex"
    24  	"fmt"
    25  	"hash"
    26  	"io"
    27  	"io/fs"
    28  	"net/http"
    29  	"os"
    30  	"strconv"
    31  	"strings"
    32  	"time"
    33  
    34  	userpb "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1"
    35  	provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
    36  	"github.com/golang-jwt/jwt/v5"
    37  	"github.com/pkg/errors"
    38  	tusd "github.com/tus/tusd/v2/pkg/handler"
    39  	"go.opentelemetry.io/otel"
    40  	"go.opentelemetry.io/otel/trace"
    41  
    42  	"github.com/cs3org/reva/v2/pkg/appctx"
    43  	ctxpkg "github.com/cs3org/reva/v2/pkg/ctx"
    44  	"github.com/cs3org/reva/v2/pkg/errtypes"
    45  	"github.com/cs3org/reva/v2/pkg/events"
    46  	"github.com/cs3org/reva/v2/pkg/rhttp/datatx/metrics"
    47  	"github.com/cs3org/reva/v2/pkg/storage/utils/decomposedfs/metadata/prefixes"
    48  	"github.com/cs3org/reva/v2/pkg/storage/utils/decomposedfs/node"
    49  	"github.com/cs3org/reva/v2/pkg/utils"
    50  )
    51  
    52  var (
    53  	tracer           trace.Tracer
    54  	ErrAlreadyExists = tusd.NewError("ERR_ALREADY_EXISTS", "file already exists", http.StatusConflict)
    55  	defaultFilePerm  = os.FileMode(0664)
    56  )
    57  
    58  func init() {
    59  	tracer = otel.Tracer("github.com/cs3org/reva/pkg/storage/utils/decomposedfs/upload")
    60  }
    61  
    62  // WriteChunk writes the stream from the reader to the given offset of the upload
    63  func (session *OcisSession) WriteChunk(ctx context.Context, offset int64, src io.Reader) (int64, error) {
    64  	ctx, span := tracer.Start(session.Context(ctx), "WriteChunk")
    65  	defer span.End()
    66  	_, subspan := tracer.Start(ctx, "os.OpenFile")
    67  	file, err := os.OpenFile(session.binPath(), os.O_WRONLY|os.O_APPEND, defaultFilePerm)
    68  	subspan.End()
    69  	if err != nil {
    70  		return 0, err
    71  	}
    72  	defer file.Close()
    73  
    74  	// calculate cheksum here? needed for the TUS checksum extension. https://tus.io/protocols/resumable-upload.html#checksum
    75  	// TODO but how do we get the `Upload-Checksum`? WriteChunk() only has a context, offset and the reader ...
    76  	// It is sent with the PATCH request, well or in the POST when the creation-with-upload extension is used
    77  	// but the tus handler uses a context.Background() so we cannot really check the header and put it in the context ...
    78  	_, subspan = tracer.Start(ctx, "io.Copy")
    79  	n, err := io.Copy(file, src)
    80  	subspan.End()
    81  
    82  	// If the HTTP PATCH request gets interrupted in the middle (e.g. because
    83  	// the user wants to pause the upload), Go's net/http returns an io.ErrUnexpectedEOF.
    84  	// However, for the ocis driver it's not important whether the stream has ended
    85  	// on purpose or accidentally.
    86  	if err != nil && err != io.ErrUnexpectedEOF {
    87  		return n, err
    88  	}
    89  
    90  	// update upload.Session.Offset so subsequent code flow can use it.
    91  	// No need to persist the session as the offset is determined by stating the blob in the GetUpload / ReadSession codepath.
    92  	// The session offset is written to disk in FinishUpload
    93  	session.info.Offset += n
    94  	return n, nil
    95  }
    96  
    97  // GetInfo returns the FileInfo
    98  func (session *OcisSession) GetInfo(_ context.Context) (tusd.FileInfo, error) {
    99  	return session.ToFileInfo(), nil
   100  }
   101  
   102  // GetReader returns an io.Reader for the upload
   103  func (session *OcisSession) GetReader(ctx context.Context) (io.ReadCloser, error) {
   104  	_, span := tracer.Start(session.Context(ctx), "GetReader")
   105  	defer span.End()
   106  	return os.Open(session.binPath())
   107  }
   108  
   109  // FinishUpload finishes an upload and moves the file to the internal destination
   110  // implements tusd.DataStore interface
   111  // returns tusd errors
   112  func (session *OcisSession) FinishUpload(ctx context.Context) error {
   113  	err := session.FinishUploadDecomposed(ctx)
   114  
   115  	//  we need to return a tusd error here to make the tusd handler return the correct status code
   116  	switch err.(type) {
   117  	case errtypes.AlreadyExists:
   118  		return tusd.NewError("ERR_ALREADY_EXISTS", err.Error(), http.StatusConflict)
   119  	case errtypes.Aborted:
   120  		return tusd.NewError("ERR_PRECONDITION_FAILED", err.Error(), http.StatusPreconditionFailed)
   121  	default:
   122  		return err
   123  	}
   124  }
   125  
   126  // FinishUploadDecomposed finishes an upload and moves the file to the internal destination
   127  // retures errtypes errors
   128  func (session *OcisSession) FinishUploadDecomposed(ctx context.Context) error {
   129  	ctx, span := tracer.Start(session.Context(ctx), "FinishUpload")
   130  	defer span.End()
   131  	log := appctx.GetLogger(ctx)
   132  
   133  	ctx = ctxpkg.ContextSetInitiator(ctx, session.InitiatorID())
   134  
   135  	sha1h, md5h, adler32h, err := node.CalculateChecksums(ctx, session.binPath())
   136  	if err != nil {
   137  		return err
   138  	}
   139  
   140  	// compare if they match the sent checksum
   141  	// TODO the tus checksum extension would do this on every chunk, but I currently don't see an easy way to pass in the requested checksum. for now we do it in FinishUpload which is also called for chunked uploads
   142  	if session.info.MetaData["checksum"] != "" {
   143  		var err error
   144  		parts := strings.SplitN(session.info.MetaData["checksum"], " ", 2)
   145  		if len(parts) != 2 {
   146  			return errtypes.BadRequest("invalid checksum format. must be '[algorithm] [checksum]'")
   147  		}
   148  		switch parts[0] {
   149  		case "sha1":
   150  			err = checkHash(parts[1], sha1h)
   151  		case "md5":
   152  			err = checkHash(parts[1], md5h)
   153  		case "adler32":
   154  			err = checkHash(parts[1], adler32h)
   155  		default:
   156  			err = errtypes.BadRequest("unsupported checksum algorithm: " + parts[0])
   157  		}
   158  		if err != nil {
   159  			session.store.Cleanup(ctx, session, true, false, false)
   160  			return err
   161  		}
   162  	}
   163  
   164  	// update checksums
   165  	attrs := node.Attributes{
   166  		prefixes.ChecksumPrefix + "sha1":    sha1h.Sum(nil),
   167  		prefixes.ChecksumPrefix + "md5":     md5h.Sum(nil),
   168  		prefixes.ChecksumPrefix + "adler32": adler32h.Sum(nil),
   169  	}
   170  
   171  	// At this point we scope by the space to create the final file in the final location
   172  	if session.store.um != nil && session.info.Storage["SpaceGid"] != "" {
   173  		gid, err := strconv.Atoi(session.info.Storage["SpaceGid"])
   174  		if err != nil {
   175  			return errors.Wrap(err, "failed to parse space gid")
   176  		}
   177  
   178  		unscope, err := session.store.um.ScopeUserByIds(-1, gid)
   179  		if err != nil {
   180  			return errors.Wrap(err, "failed to scope user")
   181  		}
   182  		if unscope != nil {
   183  			defer func() { _ = unscope() }()
   184  		}
   185  	}
   186  
   187  	n, err := session.store.CreateNodeForUpload(ctx, session, attrs)
   188  	if err != nil {
   189  		return err
   190  	}
   191  	// increase the processing counter for every started processing
   192  	// will be decreased in Cleanup()
   193  	metrics.UploadProcessing.Inc()
   194  	metrics.UploadSessionsBytesReceived.Inc()
   195  
   196  	if session.store.pub != nil && session.info.Size > 0 {
   197  		u, _ := ctxpkg.ContextGetUser(ctx)
   198  		s, err := session.URL(ctx)
   199  		if err != nil {
   200  			return err
   201  		}
   202  
   203  		var iu *userpb.User
   204  		if utils.ExistsInOpaque(u.Opaque, "impersonating-user") {
   205  			iu = &userpb.User{}
   206  			if err := utils.ReadJSONFromOpaque(u.Opaque, "impersonating-user", iu); err != nil {
   207  				return err
   208  			}
   209  		}
   210  
   211  		if err := events.Publish(ctx, session.store.pub, events.BytesReceived{
   212  			UploadID:          session.ID(),
   213  			URL:               s,
   214  			SpaceOwner:        n.SpaceOwnerOrManager(session.Context(ctx)),
   215  			ExecutingUser:     u,
   216  			ResourceID:        &provider.ResourceId{SpaceId: n.SpaceID, OpaqueId: n.ID},
   217  			Filename:          session.Filename(),
   218  			Filesize:          uint64(session.Size()),
   219  			ImpersonatingUser: iu,
   220  		}); err != nil {
   221  			return err
   222  		}
   223  	}
   224  
   225  	// if the upload is synchronous or the upload is empty, finalize it now
   226  	// for 0-byte uploads we take a shortcut and finalize isn't called elsewhere
   227  	if !session.store.async || session.info.Size == 0 {
   228  		// handle postprocessing synchronously
   229  		err = session.Finalize(ctx)
   230  		session.store.Cleanup(ctx, session, err != nil, false, err == nil)
   231  		if err != nil {
   232  			log.Error().Err(err).Msg("failed to upload")
   233  			return err
   234  		}
   235  		metrics.UploadSessionsFinalized.Inc()
   236  	}
   237  
   238  	return session.store.tp.Propagate(ctx, n, session.SizeDiff())
   239  }
   240  
   241  // Terminate terminates the upload
   242  func (session *OcisSession) Terminate(_ context.Context) error {
   243  	session.Cleanup(true, true, true)
   244  	return nil
   245  }
   246  
   247  // DeclareLength updates the upload length information
   248  func (session *OcisSession) DeclareLength(ctx context.Context, length int64) error {
   249  	session.info.Size = length
   250  	session.info.SizeIsDeferred = false
   251  	return session.store.um.RunInBaseScope(func() error {
   252  		return session.Persist(session.Context(ctx))
   253  	})
   254  }
   255  
   256  // ConcatUploads concatenates multiple uploads
   257  func (session *OcisSession) ConcatUploads(_ context.Context, uploads []tusd.Upload) (err error) {
   258  	file, err := os.OpenFile(session.binPath(), os.O_WRONLY|os.O_APPEND, defaultFilePerm)
   259  	if err != nil {
   260  		return err
   261  	}
   262  	defer file.Close()
   263  
   264  	for _, partialUpload := range uploads {
   265  		fileUpload := partialUpload.(*OcisSession)
   266  
   267  		src, err := os.Open(fileUpload.binPath())
   268  		if err != nil {
   269  			return err
   270  		}
   271  		defer src.Close()
   272  
   273  		if _, err := io.Copy(file, src); err != nil {
   274  			return err
   275  		}
   276  	}
   277  
   278  	return
   279  }
   280  
   281  // Finalize finalizes the upload (eg moves the file to the internal destination)
   282  func (session *OcisSession) Finalize(ctx context.Context) (err error) {
   283  	ctx, span := tracer.Start(session.Context(ctx), "Finalize")
   284  	defer span.End()
   285  
   286  	revisionNode := node.New(session.SpaceID(), session.NodeID(), "", "", session.Size(), session.ID(),
   287  		provider.ResourceType_RESOURCE_TYPE_FILE, session.SpaceOwner(), session.store.lu)
   288  
   289  	// upload the data to the blobstore
   290  	_, subspan := tracer.Start(ctx, "WriteBlob")
   291  	err = session.store.tp.WriteBlob(revisionNode, session.binPath())
   292  	subspan.End()
   293  	if err != nil {
   294  		return errors.Wrap(err, "failed to upload file to blobstore")
   295  	}
   296  
   297  	return nil
   298  }
   299  
   300  func checkHash(expected string, h hash.Hash) error {
   301  	hash := hex.EncodeToString(h.Sum(nil))
   302  	if expected != hash {
   303  		return errtypes.ChecksumMismatch(fmt.Sprintf("invalid checksum: expected %s got %x", expected, hash))
   304  	}
   305  	return nil
   306  }
   307  
   308  func (session *OcisSession) removeNode(ctx context.Context) {
   309  	n, err := session.Node(ctx)
   310  	if err != nil {
   311  		appctx.GetLogger(ctx).Error().Str("session", session.ID()).Err(err).Msg("getting node from session failed")
   312  		return
   313  	}
   314  	if err := n.Purge(ctx); err != nil {
   315  		appctx.GetLogger(ctx).Error().Str("nodepath", n.InternalPath()).Err(err).Msg("purging node failed")
   316  	}
   317  }
   318  
   319  // cleanup cleans up after the upload is finished
   320  func (session *OcisSession) Cleanup(revertNodeMetadata, cleanBin, cleanInfo bool) {
   321  	ctx := session.Context(context.Background())
   322  
   323  	if revertNodeMetadata {
   324  		n, err := session.Node(ctx)
   325  		if err != nil {
   326  			appctx.GetLogger(ctx).Error().Err(err).Str("sessionid", session.ID()).Msg("reading node for session failed")
   327  		} else {
   328  			if session.NodeExists() && session.info.MetaData["versionsPath"] != "" {
   329  				p := session.info.MetaData["versionsPath"]
   330  				if err := session.store.lu.CopyMetadata(ctx, p, n.InternalPath(), func(attributeName string, value []byte) (newValue []byte, copy bool) {
   331  					return value, strings.HasPrefix(attributeName, prefixes.ChecksumPrefix) ||
   332  						attributeName == prefixes.TypeAttr ||
   333  						attributeName == prefixes.BlobIDAttr ||
   334  						attributeName == prefixes.BlobsizeAttr ||
   335  						attributeName == prefixes.MTimeAttr
   336  				}, true); err != nil {
   337  					appctx.GetLogger(ctx).Info().Str("versionpath", p).Str("nodepath", n.InternalPath()).Err(err).Msg("renaming version node failed")
   338  				}
   339  
   340  				if err := os.RemoveAll(p); err != nil {
   341  					appctx.GetLogger(ctx).Info().Str("versionpath", p).Str("nodepath", n.InternalPath()).Err(err).Msg("error removing version")
   342  				}
   343  
   344  			} else {
   345  				// if no other upload session is in progress (processing id != session id) or has finished (processing id == "")
   346  				latestSession, err := n.ProcessingID(ctx)
   347  				if err != nil {
   348  					appctx.GetLogger(ctx).Error().Err(err).Str("spaceid", n.SpaceID).Str("nodeid", n.ID).Str("uploadid", session.ID()).Msg("reading processingid for session failed")
   349  				}
   350  				if latestSession == session.ID() {
   351  					// actually delete the node
   352  					session.removeNode(ctx)
   353  				}
   354  				// FIXME else if the upload has become a revision, delete the revision, or if it is the last one, delete the node
   355  			}
   356  		}
   357  	}
   358  
   359  	if cleanBin {
   360  		if err := os.Remove(session.binPath()); err != nil && !errors.Is(err, fs.ErrNotExist) {
   361  			appctx.GetLogger(ctx).Error().Str("path", session.binPath()).Err(err).Msg("removing upload failed")
   362  		}
   363  	}
   364  
   365  	if cleanInfo {
   366  		if err := session.Purge(ctx); err != nil && !errors.Is(err, fs.ErrNotExist) {
   367  			appctx.GetLogger(ctx).Error().Err(err).Str("session", session.ID()).Msg("removing upload info failed")
   368  		}
   369  	}
   370  }
   371  
   372  // URL returns a url to download an upload
   373  func (session *OcisSession) URL(_ context.Context) (string, error) {
   374  	type transferClaims struct {
   375  		jwt.RegisteredClaims
   376  		Target string `json:"target"`
   377  	}
   378  
   379  	u := joinurl(session.store.tknopts.DownloadEndpoint, "tus/", session.ID())
   380  	ttl := time.Duration(session.store.tknopts.TransferExpires) * time.Second
   381  	claims := transferClaims{
   382  		RegisteredClaims: jwt.RegisteredClaims{
   383  			ExpiresAt: jwt.NewNumericDate(time.Now().Add(ttl)),
   384  			Audience:  jwt.ClaimStrings{"reva"},
   385  			IssuedAt:  jwt.NewNumericDate(time.Now()),
   386  		},
   387  		Target: u,
   388  	}
   389  
   390  	t := jwt.NewWithClaims(jwt.GetSigningMethod("HS256"), claims)
   391  
   392  	tkn, err := t.SignedString([]byte(session.store.tknopts.TransferSharedSecret))
   393  	if err != nil {
   394  		return "", errors.Wrapf(err, "error signing token with claims %+v", claims)
   395  	}
   396  
   397  	return joinurl(session.store.tknopts.DataGatewayEndpoint, tkn), nil
   398  }
   399  
   400  // replace with url.JoinPath after switching to go1.19
   401  func joinurl(paths ...string) string {
   402  	var s strings.Builder
   403  	l := len(paths)
   404  	for i, p := range paths {
   405  		s.WriteString(p)
   406  		if !strings.HasSuffix(p, "/") && i != l-1 {
   407  			s.WriteString("/")
   408  		}
   409  	}
   410  
   411  	return s.String()
   412  }