github.com/cs3org/reva/v2@v2.27.7/pkg/rhttp/datatx/utils/download/download.go (about)

     1  // Copyright 2018-2021 CERN
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // In applying this license, CERN does not waive the privileges and immunities
    16  // granted to it by virtue of its status as an Intergovernmental Organization
    17  // or submit itself to any jurisdiction.
    18  
    19  // Package download provides a library to handle file download requests.
    20  package download
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"io"
    26  	"mime/multipart"
    27  	"net/http"
    28  	"path"
    29  	"strconv"
    30  	"strings"
    31  
    32  	provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
    33  	"github.com/rs/zerolog"
    34  
    35  	"github.com/cs3org/reva/v2/internal/grpc/services/storageprovider"
    36  	"github.com/cs3org/reva/v2/internal/http/services/owncloud/ocdav/net"
    37  	"github.com/cs3org/reva/v2/pkg/appctx"
    38  	"github.com/cs3org/reva/v2/pkg/errtypes"
    39  	"github.com/cs3org/reva/v2/pkg/storage"
    40  	"github.com/cs3org/reva/v2/pkg/storagespace"
    41  	"github.com/cs3org/reva/v2/pkg/utils"
    42  )
    43  
    44  type contextKey struct{}
    45  
    46  var etagKey = contextKey{}
    47  
    48  // ContextWithEtag returns a new `context.Context` that holds an etag.
    49  func ContextWithEtag(ctx context.Context, etag string) context.Context {
    50  	return context.WithValue(ctx, etagKey, etag)
    51  }
    52  
    53  // EtagFromContext returns the etag previously associated with `ctx`, or
    54  // `""` if no such etag could be found.
    55  func EtagFromContext(ctx context.Context) string {
    56  	val := ctx.Value(etagKey)
    57  	if etag, ok := val.(string); ok {
    58  		return etag
    59  	}
    60  	return ""
    61  }
    62  
    63  // GetOrHeadFile returns the requested file content
    64  func GetOrHeadFile(w http.ResponseWriter, r *http.Request, fs storage.FS, spaceID string) {
    65  	ctx := r.Context()
    66  	sublog := appctx.GetLogger(ctx).With().Str("svc", "datatx").Str("handler", "download").Logger()
    67  
    68  	var fn string
    69  	files, ok := r.URL.Query()["filename"]
    70  	if !ok || len(files[0]) < 1 {
    71  		fn = r.URL.Path
    72  	} else {
    73  		fn = files[0]
    74  	}
    75  
    76  	var ref *provider.Reference
    77  	if spaceID == "" {
    78  		// ensure the absolute path starts with '/'
    79  		ref = &provider.Reference{Path: path.Join("/", fn)}
    80  	} else {
    81  		// build a storage space reference
    82  		rid, err := storagespace.ParseID(spaceID)
    83  		if err != nil {
    84  			handleError(w, &sublog, err, "parse ID")
    85  		}
    86  		ref = &provider.Reference{
    87  			ResourceId: &rid,
    88  			// ensure the relative path starts with '.'
    89  			Path: utils.MakeRelativePath(fn),
    90  		}
    91  	}
    92  	// TODO check preconditions like If-Range, If-Match ...
    93  
    94  	var md *provider.ResourceInfo
    95  	var content io.ReadCloser
    96  	var err error
    97  	var notModified bool
    98  
    99  	// do a stat to set Content-Length and etag headers
   100  
   101  	md, content, err = fs.Download(ctx, ref, func(md *provider.ResourceInfo) bool {
   102  		// range requests always need to open the reader to check if it is seekable
   103  		if r.Header.Get("Range") != "" {
   104  			return true
   105  		}
   106  		// otherwise, HEAD requests do not need to open a reader
   107  		if r.Method == "HEAD" {
   108  			return false
   109  		}
   110  
   111  		// check etag, see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match
   112  		for _, etag := range r.Header.Values(net.HeaderIfNoneMatch) {
   113  			if md.Etag == etag {
   114  				// When the condition fails for GET and HEAD methods, then the server must return
   115  				// HTTP status code 304 (Not Modified). [...] Note that the server generating a
   116  				// 304 response MUST generate any of the following header fields that would have
   117  				// been sent in a 200 (OK) response to the same request:
   118  				// Cache-Control, Content-Location, Date, ETag, Expires, and Vary.
   119  				notModified = true
   120  				return false
   121  			}
   122  		}
   123  		return true
   124  	})
   125  	if err != nil {
   126  		handleError(w, &sublog, err, "download")
   127  		return
   128  	}
   129  	if content != nil {
   130  		defer content.Close()
   131  	}
   132  	if notModified {
   133  		w.Header().Set(net.HeaderETag, md.Etag)
   134  		w.WriteHeader(http.StatusNotModified)
   135  		return
   136  	}
   137  
   138  	// fill in storage provider id if it is missing
   139  	if spaceID != "" && md.GetId().GetStorageId() == "" {
   140  		md.Id.StorageId = ref.ResourceId.StorageId
   141  	}
   142  
   143  	var ranges []HTTPRange
   144  
   145  	if r.Header.Get("Range") != "" {
   146  		ranges, err = ParseRange(r.Header.Get("Range"), int64(md.Size))
   147  		if err != nil {
   148  			if err == ErrNoOverlap {
   149  				w.Header().Set("Content-Range", fmt.Sprintf("bytes */%d", md.Size))
   150  			}
   151  			sublog.Error().Err(err).Interface("md", md).Interface("ranges", ranges).Msg("range request not satisfiable")
   152  			w.WriteHeader(http.StatusRequestedRangeNotSatisfiable)
   153  
   154  			return
   155  		}
   156  		if SumRangesSize(ranges) > int64(md.Size) {
   157  			// The total number of bytes in all the ranges
   158  			// is larger than the size of the file by
   159  			// itself, so this is probably an attack, or a
   160  			// dumb client. Ignore the range request.
   161  			ranges = nil
   162  		}
   163  	}
   164  
   165  	code := http.StatusOK
   166  	sendSize := int64(md.Size)
   167  	var sendContent io.Reader = content
   168  
   169  	var s io.Seeker
   170  	if s, ok = content.(io.Seeker); ok {
   171  		// tell clients they can send range requests
   172  		w.Header().Set("Accept-Ranges", "bytes")
   173  	}
   174  
   175  	w.Header().Set(net.HeaderContentType, strings.Join([]string{md.MimeType, "charset=UTF-8"}, "; "))
   176  
   177  	if len(ranges) > 0 {
   178  		sublog.Debug().Int64("start", ranges[0].Start).Int64("length", ranges[0].Length).Msg("range request")
   179  		if s == nil {
   180  			sublog.Error().Int64("start", ranges[0].Start).Int64("length", ranges[0].Length).Msg("ReadCloser is not seekable")
   181  			w.WriteHeader(http.StatusRequestedRangeNotSatisfiable)
   182  			return
   183  		}
   184  
   185  		code = http.StatusPartialContent
   186  
   187  		switch {
   188  		case len(ranges) == 1:
   189  			// RFC 7233, Section 4.1:
   190  			// "If a single part is being transferred, the server
   191  			// generating the 206 response MUST generate a
   192  			// Content-Range header field, describing what range
   193  			// of the selected representation is enclosed, and a
   194  			// payload consisting of the range.
   195  			// ...
   196  			// A server MUST NOT generate a multipart response to
   197  			// a request for a single range, since a client that
   198  			// does not request multiple parts might not support
   199  			// multipart responses."
   200  			ra := ranges[0]
   201  			if _, err := s.Seek(ra.Start, io.SeekStart); err != nil {
   202  				sublog.Error().Err(err).Int64("start", ra.Start).Int64("length", ra.Length).Msg("content is not seekable")
   203  				w.WriteHeader(http.StatusRequestedRangeNotSatisfiable)
   204  				return
   205  			}
   206  			sendSize = ra.Length
   207  			w.Header().Set("Content-Range", ra.ContentRange(int64(md.Size)))
   208  		case len(ranges) > 1:
   209  			sendSize = RangesMIMESize(ranges, md.MimeType, int64(md.Size))
   210  
   211  			pr, pw := io.Pipe()
   212  			mw := multipart.NewWriter(pw)
   213  			w.Header().Set("Content-Type", "multipart/byteranges; boundary="+mw.Boundary())
   214  			sendContent = pr
   215  			defer pr.Close() // cause writing goroutine to fail and exit if CopyN doesn't finish.
   216  			go func() {
   217  				for _, ra := range ranges {
   218  					part, err := mw.CreatePart(ra.MimeHeader(md.MimeType+"; charset=UTF-8", int64(md.Size)))
   219  					if err != nil {
   220  						_ = pw.CloseWithError(err) // CloseWithError always returns nil
   221  						return
   222  					}
   223  					if _, err := s.Seek(ra.Start, io.SeekStart); err != nil {
   224  						_ = pw.CloseWithError(err) // CloseWithError always returns nil
   225  						return
   226  					}
   227  					if _, err := io.CopyN(part, content, ra.Length); err != nil {
   228  						_ = pw.CloseWithError(err) // CloseWithError always returns nil
   229  						return
   230  					}
   231  				}
   232  				mw.Close()
   233  				pw.Close()
   234  			}()
   235  		}
   236  	}
   237  
   238  	if w.Header().Get(net.HeaderContentEncoding) == "" {
   239  		w.Header().Set(net.HeaderContentLength, strconv.FormatInt(sendSize, 10))
   240  	}
   241  
   242  	w.Header().Set(net.HeaderContentDisposistion, net.ContentDispositionAttachment(path.Base(md.Path)))
   243  	w.Header().Set(net.HeaderETag, md.Etag)
   244  	w.Header().Set(net.HeaderOCFileID, storagespace.FormatResourceID(md.Id))
   245  	w.Header().Set(net.HeaderOCETag, md.Etag)
   246  	w.Header().Set(net.HeaderLastModified, net.RFC1123Z(md.Mtime))
   247  
   248  	if md.Checksum != nil {
   249  		w.Header().Set(net.HeaderOCChecksum, fmt.Sprintf("%s:%s", strings.ToUpper(string(storageprovider.GRPC2PKGXS(md.Checksum.Type))), md.Checksum.Sum))
   250  	}
   251  
   252  	w.WriteHeader(code)
   253  
   254  	if r.Method != "HEAD" {
   255  		var c int64
   256  		c, err = io.CopyN(w, sendContent, sendSize)
   257  		if err != nil {
   258  			sublog.Error().Err(err).Interface("resourceid", md.Id).Msg("error copying data to response")
   259  			return
   260  		}
   261  		if c != sendSize {
   262  			sublog.Error().Int64("copied", c).Int64("size", sendSize).Msg("copied vs size mismatch")
   263  		}
   264  	}
   265  }
   266  
   267  func handleError(w http.ResponseWriter, log *zerolog.Logger, err error, action string) {
   268  	switch err.(type) {
   269  	case errtypes.IsNotFound:
   270  		log.Debug().Err(err).Str("action", action).Msg("file not found")
   271  		w.WriteHeader(http.StatusNotFound)
   272  	case errtypes.IsPermissionDenied:
   273  		log.Debug().Err(err).Str("action", action).Msg("permission denied")
   274  		w.WriteHeader(http.StatusForbidden)
   275  	case errtypes.Aborted:
   276  		log.Debug().Err(err).Str("action", action).Msg("etags do not match")
   277  		w.WriteHeader(http.StatusPreconditionFailed)
   278  	default:
   279  		log.Error().Err(err).Str("action", action).Msg("unexpected error")
   280  		w.WriteHeader(http.StatusInternalServerError)
   281  	}
   282  }