github.com/cs3org/reva/v2@v2.27.7/internal/http/services/archiver/handler.go (about)

     1  // Copyright 2018-2021 CERN
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // In applying this license, CERN does not waive the privileges and immunities
    16  // granted to it by virtue of its status as an Intergovernmental Organization
    17  // or submit itself to any jurisdiction.
    18  
    19  package archiver
    20  
    21  import (
    22  	"context"
    23  	"errors"
    24  	"fmt"
    25  	"net/http"
    26  	"time"
    27  
    28  	"regexp"
    29  
    30  	gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1"
    31  	rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1"
    32  	provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
    33  
    34  	"github.com/cs3org/reva/v2/internal/http/services/archiver/manager"
    35  	"github.com/cs3org/reva/v2/pkg/errtypes"
    36  	"github.com/cs3org/reva/v2/pkg/rgrpc/todo/pool"
    37  	"github.com/cs3org/reva/v2/pkg/rhttp"
    38  	"github.com/cs3org/reva/v2/pkg/rhttp/global"
    39  	"github.com/cs3org/reva/v2/pkg/sharedconf"
    40  	"github.com/cs3org/reva/v2/pkg/storage/utils/downloader"
    41  	"github.com/cs3org/reva/v2/pkg/storage/utils/walker"
    42  	"github.com/cs3org/reva/v2/pkg/storagespace"
    43  	"github.com/gdexlab/go-render/render"
    44  	"github.com/mitchellh/mapstructure"
    45  	"github.com/rs/zerolog"
    46  )
    47  
    48  type svc struct {
    49  	config          *Config
    50  	gatewaySelector pool.Selectable[gateway.GatewayAPIClient]
    51  	log             *zerolog.Logger
    52  	walker          walker.Walker
    53  	downloader      downloader.Downloader
    54  
    55  	allowedFolders []*regexp.Regexp
    56  }
    57  
    58  // Config holds the config options that need to be passed down to all ocdav handlers
    59  type Config struct {
    60  	Prefix         string   `mapstructure:"prefix"`
    61  	GatewaySvc     string   `mapstructure:"gatewaysvc"`
    62  	Timeout        int64    `mapstructure:"timeout"`
    63  	Insecure       bool     `mapstructure:"insecure"`
    64  	Name           string   `mapstructure:"name"`
    65  	MaxNumFiles    int64    `mapstructure:"max_num_files"`
    66  	MaxSize        int64    `mapstructure:"max_size"`
    67  	AllowedFolders []string `mapstructure:"allowed_folders"`
    68  }
    69  
    70  func init() {
    71  	global.Register("archiver", New)
    72  }
    73  
    74  // New creates a new archiver service
    75  func New(conf map[string]interface{}, log *zerolog.Logger) (global.Service, error) {
    76  	c := &Config{}
    77  	err := mapstructure.Decode(conf, c)
    78  	if err != nil {
    79  		return nil, err
    80  	}
    81  
    82  	c.init()
    83  
    84  	gatewaySelector, err := pool.GatewaySelector(c.GatewaySvc)
    85  	if err != nil {
    86  		return nil, err
    87  	}
    88  
    89  	// compile all the regex for filtering folders
    90  	allowedFolderRegex := make([]*regexp.Regexp, 0, len(c.AllowedFolders))
    91  	for _, s := range c.AllowedFolders {
    92  		regex, err := regexp.Compile(s)
    93  		if err != nil {
    94  			return nil, err
    95  		}
    96  		allowedFolderRegex = append(allowedFolderRegex, regex)
    97  	}
    98  
    99  	return &svc{
   100  		config:          c,
   101  		gatewaySelector: gatewaySelector,
   102  		downloader:      downloader.NewDownloader(gatewaySelector, rhttp.Insecure(c.Insecure), rhttp.Timeout(time.Duration(c.Timeout*int64(time.Second)))),
   103  		walker:          walker.NewWalker(gatewaySelector),
   104  		log:             log,
   105  		allowedFolders:  allowedFolderRegex,
   106  	}, nil
   107  }
   108  
   109  func (c *Config) init() {
   110  	if c.Prefix == "" {
   111  		c.Prefix = "download_archive"
   112  	}
   113  
   114  	if c.Name == "" {
   115  		c.Name = "download"
   116  	}
   117  
   118  	c.GatewaySvc = sharedconf.GetGatewaySVC(c.GatewaySvc)
   119  }
   120  
   121  func (s *svc) getResources(ctx context.Context, paths, ids []string) ([]*provider.ResourceId, error) {
   122  	if len(paths) == 0 && len(ids) == 0 {
   123  		return nil, errtypes.BadRequest("path and id lists are both empty")
   124  	}
   125  
   126  	resources := make([]*provider.ResourceId, 0, len(paths)+len(ids))
   127  
   128  	for _, id := range ids {
   129  		// id is base64 encoded and after decoding has the form <storage_id>:<resource_id>
   130  
   131  		decodedID, err := storagespace.ParseID(id)
   132  		if err != nil {
   133  			return nil, errors.New("could not unwrap given file id")
   134  		}
   135  
   136  		resources = append(resources, &decodedID)
   137  
   138  	}
   139  
   140  	gatewayClient, err := s.gatewaySelector.Next()
   141  	if err != nil {
   142  		return nil, err
   143  	}
   144  	for _, p := range paths {
   145  		// id is base64 encoded and after decoding has the form <storage_id>:<resource_id>
   146  
   147  		resp, err := gatewayClient.Stat(ctx, &provider.StatRequest{
   148  			Ref: &provider.Reference{
   149  				Path: p,
   150  			},
   151  		})
   152  
   153  		switch {
   154  		case err != nil:
   155  			return nil, err
   156  		case resp.Status.Code == rpc.Code_CODE_NOT_FOUND:
   157  			return nil, errtypes.NotFound(p)
   158  		case resp.Status.Code != rpc.Code_CODE_OK:
   159  			return nil, errtypes.InternalError(fmt.Sprintf("error stating %s", p))
   160  		}
   161  
   162  		resources = append(resources, resp.Info.Id)
   163  
   164  	}
   165  
   166  	// check if all the folders are allowed to be archived
   167  	/* FIXME bring back filtering
   168  	err := s.allAllowed(resources)
   169  	if err != nil {
   170  		return nil, err
   171  	}
   172  	*/
   173  
   174  	return resources, nil
   175  }
   176  
   177  // return true if path match with at least with one allowed folder regex
   178  /*
   179  func (s *svc) isPathAllowed(path string) bool {
   180  	for _, reg := range s.allowedFolders {
   181  		if reg.MatchString(path) {
   182  			return true
   183  		}
   184  	}
   185  	return false
   186  }
   187  
   188  // return nil if all the paths in the slide match with at least one allowed folder regex
   189  func (s *svc) allAllowed(paths []string) error {
   190  	if len(s.allowedFolders) == 0 {
   191  		return nil
   192  	}
   193  
   194  	for _, f := range paths {
   195  		if !s.isPathAllowed(f) {
   196  			return errtypes.BadRequest(fmt.Sprintf("resource at %s not allowed to be archived", f))
   197  		}
   198  	}
   199  	return nil
   200  }
   201  */
   202  
   203  func (s *svc) writeHTTPError(rw http.ResponseWriter, err error) {
   204  	s.log.Error().Msg(err.Error())
   205  
   206  	switch err.(type) {
   207  	case errtypes.NotFound, errtypes.PermissionDenied:
   208  		rw.WriteHeader(http.StatusNotFound)
   209  	case manager.ErrMaxSize, manager.ErrMaxFileCount:
   210  		rw.WriteHeader(http.StatusRequestEntityTooLarge)
   211  	case errtypes.BadRequest:
   212  		rw.WriteHeader(http.StatusBadRequest)
   213  	default:
   214  		rw.WriteHeader(http.StatusInternalServerError)
   215  	}
   216  
   217  	_, _ = rw.Write([]byte(err.Error()))
   218  }
   219  
   220  func (s *svc) Handler() http.Handler {
   221  	return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
   222  		// get the paths and/or the resources id from the query
   223  		ctx := r.Context()
   224  		v := r.URL.Query()
   225  
   226  		paths, ok := v["path"]
   227  		if !ok {
   228  			paths = []string{}
   229  		}
   230  		ids, ok := v["id"]
   231  		if !ok {
   232  			ids = []string{}
   233  		}
   234  		format := v.Get("output-format")
   235  		if format == "" {
   236  			format = "zip"
   237  		}
   238  
   239  		resources, err := s.getResources(ctx, paths, ids)
   240  		if err != nil {
   241  			s.writeHTTPError(rw, err)
   242  			return
   243  		}
   244  
   245  		arch, err := manager.NewArchiver(resources, s.walker, s.downloader, manager.Config{
   246  			MaxNumFiles: s.config.MaxNumFiles,
   247  			MaxSize:     s.config.MaxSize,
   248  		})
   249  		if err != nil {
   250  			s.writeHTTPError(rw, err)
   251  			return
   252  		}
   253  
   254  		archName := s.config.Name
   255  		if format == "tar" {
   256  			archName += ".tar"
   257  		} else {
   258  			archName += ".zip"
   259  		}
   260  
   261  		s.log.Debug().Msg("Requested the following resources to archive: " + render.Render(resources))
   262  
   263  		rw.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", archName))
   264  		rw.Header().Set("Content-Transfer-Encoding", "binary")
   265  
   266  		// create the archive
   267  		var closeArchive func()
   268  		if format == "tar" {
   269  			closeArchive, err = arch.CreateTar(ctx, rw)
   270  		} else {
   271  			closeArchive, err = arch.CreateZip(ctx, rw)
   272  		}
   273  		defer closeArchive()
   274  
   275  		if err != nil {
   276  			s.writeHTTPError(rw, err)
   277  			return
   278  		}
   279  
   280  	})
   281  }
   282  
   283  func (s *svc) Prefix() string {
   284  	return s.config.Prefix
   285  }
   286  
   287  func (s *svc) Close() error {
   288  	return nil
   289  }
   290  
   291  func (s *svc) Unprotected() []string {
   292  	return nil
   293  }