github.com/cs3org/reva/v2@v2.27.7/pkg/ocm/storage/received/upload.go (about)

     1  // Copyright 2018-2023 CERN
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // In applying this license, CERN does not waive the privileges and immunities
    16  // granted to it by virtue of its status as an Intergovernmental Organization
    17  // or submit itself to any jurisdiction.
    18  
    19  package ocm
    20  
    21  import (
    22  	"context"
    23  	"crypto/md5"
    24  	"crypto/sha1"
    25  	"encoding/hex"
    26  	"encoding/json"
    27  	"errors"
    28  	"fmt"
    29  	"hash"
    30  	"hash/adler32"
    31  	"io"
    32  	"net/http"
    33  	"os"
    34  	"path/filepath"
    35  	"strings"
    36  
    37  	"github.com/google/uuid"
    38  	tusd "github.com/tus/tusd/v2/pkg/handler"
    39  
    40  	userpb "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1"
    41  	provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
    42  	"github.com/cs3org/reva/v2/pkg/appctx"
    43  	ctxpkg "github.com/cs3org/reva/v2/pkg/ctx"
    44  	"github.com/cs3org/reva/v2/pkg/errtypes"
    45  	"github.com/cs3org/reva/v2/pkg/storage"
    46  	"github.com/cs3org/reva/v2/pkg/utils"
    47  )
    48  
    49  var defaultFilePerm = os.FileMode(0664)
    50  
    51  func (d *driver) ListUploadSessions(ctx context.Context, filter storage.UploadSessionFilter) ([]storage.UploadSession, error) {
    52  	return []storage.UploadSession{}, nil
    53  }
    54  func (d *driver) InitiateUpload(ctx context.Context, ref *provider.Reference, uploadLength int64, metadata map[string]string) (map[string]string, error) {
    55  	shareID, rel := shareInfoFromReference(ref)
    56  	p := getPathFromShareIDAndRelPath(shareID, rel)
    57  
    58  	info := tusd.FileInfo{
    59  		MetaData: tusd.MetaData{
    60  			"filename": filepath.Base(p),
    61  			"dir":      filepath.Dir(p),
    62  		},
    63  		Size: uploadLength,
    64  	}
    65  
    66  	upload, err := d.NewUpload(ctx, info)
    67  	if err != nil {
    68  		return nil, err
    69  	}
    70  
    71  	info, _ = upload.GetInfo(ctx)
    72  
    73  	return map[string]string{
    74  		"simple": info.ID,
    75  		"tus":    info.ID,
    76  	}, nil
    77  }
    78  
    79  func (d *driver) Upload(ctx context.Context, req storage.UploadRequest, _ storage.UploadFinishedFunc) (*provider.ResourceInfo, error) {
    80  	shareID, _ := shareInfoFromReference(req.Ref)
    81  	u, err := d.GetUpload(ctx, shareID.OpaqueId)
    82  	if err != nil {
    83  		return &provider.ResourceInfo{}, err
    84  	}
    85  
    86  	info, err := u.GetInfo(ctx)
    87  	if err != nil {
    88  		return &provider.ResourceInfo{}, err
    89  	}
    90  
    91  	defer cleanup(&upload{Info: info})
    92  
    93  	client, _, rel, err := d.webdavClient(ctx, nil, &provider.Reference{
    94  		Path: filepath.Join(info.MetaData["dir"], info.MetaData["filename"]),
    95  	})
    96  	if err != nil {
    97  		return &provider.ResourceInfo{}, err
    98  	}
    99  	client.SetInterceptor(func(method string, rq *http.Request) {
   100  		// Set the content length on the request struct directly instead of the header.
   101  		// The content-length header gets reset by the golang http library before
   102  		// sendind out the request, resulting in chunked encoding to be used which
   103  		// breaks the quota checks in ocdav.
   104  		if method == "PUT" {
   105  			rq.ContentLength = req.Length
   106  		}
   107  	})
   108  
   109  	locktoken, _ := ctxpkg.ContextGetLockID(ctx)
   110  	return &provider.ResourceInfo{}, client.WriteStream(rel, req.Body, 0, locktoken)
   111  }
   112  
   113  // UseIn tells the tus upload middleware which extensions it supports.
   114  func (d *driver) UseIn(composer *tusd.StoreComposer) {
   115  	composer.UseCore(d)
   116  	composer.UseTerminater(d)
   117  	composer.UseConcater(d)
   118  	composer.UseLengthDeferrer(d)
   119  }
   120  
   121  // AsTerminatableUpload returns a TerminatableUpload
   122  // To implement the termination extension as specified in https://tus.io/protocols/resumable-upload.html#termination
   123  // the storage needs to implement AsTerminatableUpload
   124  func (d *driver) AsTerminatableUpload(up tusd.Upload) tusd.TerminatableUpload {
   125  	return up.(*upload)
   126  }
   127  
   128  // AsLengthDeclarableUpload returns a LengthDeclarableUpload
   129  // To implement the creation-defer-length extension as specified in https://tus.io/protocols/resumable-upload.html#creation
   130  // the storage needs to implement AsLengthDeclarableUpload
   131  func (d *driver) AsLengthDeclarableUpload(up tusd.Upload) tusd.LengthDeclarableUpload {
   132  	return up.(*upload)
   133  }
   134  
   135  // AsConcatableUpload returns a ConcatableUpload
   136  // To implement the concatenation extension as specified in https://tus.io/protocols/resumable-upload.html#concatenation
   137  // the storage needs to implement AsConcatableUpload
   138  func (d *driver) AsConcatableUpload(up tusd.Upload) tusd.ConcatableUpload {
   139  	return up.(*upload)
   140  }
   141  
   142  // To implement the core tus.io protocol as specified in https://tus.io/protocols/resumable-upload.html#core-protocol
   143  // - the storage needs to implement NewUpload and GetUpload
   144  // - the upload needs to implement the tusd.Upload interface: WriteChunk, GetInfo, GetReader and FinishUpload
   145  
   146  // NewUpload returns a new tus Upload instance
   147  func (d *driver) NewUpload(ctx context.Context, info tusd.FileInfo) (tusd.Upload, error) {
   148  	return NewUpload(ctx, d, d.c.StorageRoot, info)
   149  }
   150  
   151  // GetUpload returns the Upload for the given upload id
   152  func (d *driver) GetUpload(ctx context.Context, id string) (tusd.Upload, error) {
   153  	return GetUpload(ctx, d, d.c.StorageRoot, id)
   154  }
   155  func NewUpload(ctx context.Context, d *driver, storageRoot string, info tusd.FileInfo) (tusd.Upload, error) {
   156  	if info.MetaData["filename"] == "" {
   157  		return nil, errors.New("Decomposedfs: missing filename in metadata")
   158  	}
   159  	if info.MetaData["dir"] == "" {
   160  		return nil, errors.New("Decomposedfs: missing dir in metadata")
   161  	}
   162  
   163  	uploadRoot := filepath.Join(storageRoot, "uploads")
   164  	info.ID = uuid.New().String()
   165  
   166  	user, ok := ctxpkg.ContextGetUser(ctx)
   167  	if !ok {
   168  		return nil, errors.New("no user in context")
   169  	}
   170  	info.MetaData["user"] = user.GetId().GetOpaqueId()
   171  	info.MetaData["idp"] = user.GetId().GetIdp()
   172  
   173  	info.Storage = map[string]string{
   174  		"Type": "OCM",
   175  		"Path": uploadRoot,
   176  	}
   177  
   178  	u := &upload{
   179  		Info: info,
   180  		Ctx:  ctx,
   181  		d:    d,
   182  	}
   183  
   184  	err := os.MkdirAll(uploadRoot, 0755)
   185  	if err != nil {
   186  		return nil, err
   187  	}
   188  
   189  	file, err := os.OpenFile(u.BinPath(), os.O_CREATE|os.O_WRONLY, defaultFilePerm)
   190  	if err != nil {
   191  		return nil, err
   192  	}
   193  	defer file.Close()
   194  
   195  	err = u.Persist()
   196  	if err != nil {
   197  		return nil, err
   198  	}
   199  	return u, nil
   200  }
   201  
   202  func GetUpload(ctx context.Context, d *driver, storageRoot string, id string) (tusd.Upload, error) {
   203  	info := tusd.FileInfo{}
   204  	data, err := os.ReadFile(filepath.Join(storageRoot, "uploads", id+".info"))
   205  	if err != nil {
   206  		return nil, err
   207  	}
   208  	err = json.Unmarshal(data, &info)
   209  	if err != nil {
   210  		return nil, err
   211  	}
   212  	upload := &upload{
   213  		Info: info,
   214  		Ctx:  ctx,
   215  		d:    d,
   216  	}
   217  	return upload, nil
   218  }
   219  
   220  type upload struct {
   221  	Info tusd.FileInfo
   222  	Ctx  context.Context
   223  
   224  	d *driver
   225  }
   226  
   227  func (u *upload) InfoPath() string {
   228  	return filepath.Join(u.Info.Storage["Path"], u.Info.ID+".info")
   229  }
   230  
   231  func (u *upload) BinPath() string {
   232  	return filepath.Join(u.Info.Storage["Path"], u.Info.ID)
   233  }
   234  
   235  func (u *upload) Persist() error {
   236  	data, err := json.Marshal(u.Info)
   237  	if err != nil {
   238  		return err
   239  	}
   240  	return os.WriteFile(u.InfoPath(), data, defaultFilePerm)
   241  }
   242  
   243  func (u *upload) WriteChunk(ctx context.Context, offset int64, src io.Reader) (int64, error) {
   244  	file, err := os.OpenFile(u.BinPath(), os.O_WRONLY|os.O_APPEND, defaultFilePerm)
   245  	if err != nil {
   246  		return 0, err
   247  	}
   248  	defer file.Close()
   249  
   250  	// calculate cheksum here? needed for the TUS checksum extension. https://tus.io/protocols/resumable-upload.html#checksum
   251  	// TODO but how do we get the `Upload-Checksum`? WriteChunk() only has a context, offset and the reader ...
   252  	// It is sent with the PATCH request, well or in the POST when the creation-with-upload extension is used
   253  	// but the tus handler uses a context.Background() so we cannot really check the header and put it in the context ...
   254  	n, err := io.Copy(file, src)
   255  
   256  	// If the HTTP PATCH request gets interrupted in the middle (e.g. because
   257  	// the user wants to pause the upload), Go's net/http returns an io.ErrUnexpectedEOF.
   258  	// However, for the ocis driver it's not important whether the stream has ended
   259  	// on purpose or accidentally.
   260  	if err != nil && err != io.ErrUnexpectedEOF {
   261  		return n, err
   262  	}
   263  
   264  	u.Info.Offset += n
   265  	return n, u.Persist()
   266  }
   267  
   268  func (u *upload) GetInfo(ctx context.Context) (tusd.FileInfo, error) {
   269  	return u.Info, nil
   270  }
   271  
   272  func (u *upload) GetReader(ctx context.Context) (io.ReadCloser, error) {
   273  	return os.Open(u.BinPath())
   274  }
   275  
   276  func (u *upload) FinishUpload(ctx context.Context) error {
   277  	log := appctx.GetLogger(u.Ctx)
   278  
   279  	// calculate the checksum of the written bytes
   280  	// they will all be written to the metadata later, so we cannot omit any of them
   281  	// TODO only calculate the checksum in sync that was requested to match, the rest could be async ... but the tests currently expect all to be present
   282  	// TODO the hashes all implement BinaryMarshaler so we could try to persist the state for resumable upload. we would neet do keep track of the copied bytes ...
   283  	sha1h := sha1.New()
   284  	md5h := md5.New()
   285  	adler32h := adler32.New()
   286  	{
   287  		f, err := os.Open(u.BinPath())
   288  		if err != nil {
   289  			// we can continue if no oc checksum header is set
   290  			log.Info().Err(err).Str("binPath", u.BinPath()).Msg("error opening binPath")
   291  		}
   292  		defer f.Close()
   293  
   294  		r1 := io.TeeReader(f, sha1h)
   295  		r2 := io.TeeReader(r1, md5h)
   296  
   297  		_, err = io.Copy(adler32h, r2)
   298  		if err != nil {
   299  			log.Info().Err(err).Msg("error copying checksums")
   300  		}
   301  	}
   302  
   303  	defer cleanup(u)
   304  	// compare if they match the sent checksum
   305  	// TODO the tus checksum extension would do this on every chunk, but I currently don't see an easy way to pass in the requested checksum. for now we do it in FinishUpload which is also called for chunked uploads
   306  	if u.Info.MetaData["checksum"] != "" {
   307  		var err error
   308  		parts := strings.SplitN(u.Info.MetaData["checksum"], " ", 2)
   309  		if len(parts) != 2 {
   310  			return errtypes.BadRequest("invalid checksum format. must be '[algorithm] [checksum]'")
   311  		}
   312  		switch parts[0] {
   313  		case "sha1":
   314  			err = u.checkHash(parts[1], sha1h)
   315  		case "md5":
   316  			err = u.checkHash(parts[1], md5h)
   317  		case "adler32":
   318  			err = u.checkHash(parts[1], adler32h)
   319  		default:
   320  			err = errtypes.BadRequest("unsupported checksum algorithm: " + parts[0])
   321  		}
   322  		if err != nil {
   323  			return err
   324  		}
   325  	}
   326  
   327  	// send to the remote storage via webdav
   328  	// shareID, rel := shareInfoFromReference(u.Info.MetaData["ref"])
   329  	// p := getPathFromShareIDAndRelPath(shareID, rel)
   330  
   331  	gwc, err := u.d.gateway.Next()
   332  	if err != nil {
   333  		return err
   334  	}
   335  	serviceUserCtx, err := utils.GetServiceUserContext(u.d.c.ServiceAccountID, gwc, u.d.c.ServiceAccountSecret)
   336  	if err != nil {
   337  		return err
   338  	}
   339  	client, _, rel, err := u.d.webdavClient(serviceUserCtx, &userpb.UserId{
   340  		OpaqueId: u.Info.MetaData["user"],
   341  		Idp:      u.Info.MetaData["idp"],
   342  	}, &provider.Reference{
   343  		Path: filepath.Join(u.Info.MetaData["dir"], u.Info.MetaData["filename"]),
   344  	})
   345  	if err != nil {
   346  		return err
   347  	}
   348  
   349  	client.SetInterceptor(func(method string, rq *http.Request) {
   350  		// Set the content length on the request struct directly instead of the header.
   351  		// The content-length header gets reset by the golang http library before
   352  		// sendind out the request, resulting in chunked encoding to be used which
   353  		// breaks the quota checks in ocdav.
   354  		if method == "PUT" {
   355  			rq.ContentLength = u.Info.Size
   356  		}
   357  	})
   358  
   359  	f, err := os.Open(u.BinPath())
   360  	if err != nil {
   361  		return err
   362  	}
   363  	defer f.Close()
   364  	return client.WriteStream(rel, f, 0, "")
   365  }
   366  
   367  func (u *upload) Terminate(ctx context.Context) error {
   368  	cleanup(u)
   369  	return nil
   370  }
   371  
   372  func (u *upload) ConcatUploads(_ context.Context, uploads []tusd.Upload) error {
   373  	file, err := os.OpenFile(u.BinPath(), os.O_WRONLY|os.O_APPEND, defaultFilePerm)
   374  	if err != nil {
   375  		return err
   376  	}
   377  	defer file.Close()
   378  
   379  	for _, partialUpload := range uploads {
   380  		fileUpload := partialUpload.(*upload)
   381  
   382  		src, err := os.Open(fileUpload.BinPath())
   383  		if err != nil {
   384  			return err
   385  		}
   386  		defer src.Close()
   387  
   388  		if _, err := io.Copy(file, src); err != nil {
   389  			return err
   390  		}
   391  	}
   392  	return nil
   393  }
   394  
   395  func (u *upload) DeclareLength(ctx context.Context, length int64) error {
   396  	u.Info.Size = length
   397  	u.Info.SizeIsDeferred = false
   398  	return nil
   399  }
   400  
   401  func (u *upload) checkHash(expected string, h hash.Hash) error {
   402  	if expected != hex.EncodeToString(h.Sum(nil)) {
   403  		return errtypes.ChecksumMismatch(fmt.Sprintf("invalid checksum: expected %s got %x", u.Info.MetaData["checksum"], h.Sum(nil)))
   404  	}
   405  	return nil
   406  }
   407  
   408  func cleanup(u *upload) {
   409  	if u == nil {
   410  		return
   411  	}
   412  	_ = os.Remove(u.BinPath())
   413  	_ = os.Remove(u.InfoPath())
   414  }