github.com/cozy/cozy-stack@v0.0.0-20240603063001-31110fa4cae1/model/remote/remote.go (about)

     1  package remote
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"fmt"
     7  	"html"
     8  	"io"
     9  	"mime"
    10  	"net/http"
    11  	"net/url"
    12  	"os"
    13  	"path"
    14  	"regexp"
    15  	"runtime"
    16  	"strings"
    17  	"time"
    18  
    19  	"github.com/cozy/cozy-stack/model/instance"
    20  	build "github.com/cozy/cozy-stack/pkg/config"
    21  	"github.com/cozy/cozy-stack/pkg/config/config"
    22  	"github.com/cozy/cozy-stack/pkg/consts"
    23  	"github.com/cozy/cozy-stack/pkg/couchdb"
    24  	"github.com/cozy/cozy-stack/pkg/filetype"
    25  	"github.com/cozy/cozy-stack/pkg/logger"
    26  	"github.com/cozy/cozy-stack/pkg/prefixer"
    27  	"github.com/cozy/httpcache"
    28  	"github.com/labstack/echo/v4"
    29  )
    30  
    31  var (
    32  	// ErrNotFoundRemote is used when no request is defined for a doctype
    33  	ErrNotFoundRemote = errors.New("the doctype has no request defined")
    34  	// ErrInvalidRequest is used when we can't use the request defined by the
    35  	// developer
    36  	ErrInvalidRequest = errors.New("the request is not valid")
    37  	// ErrRequestFailed is used when the connexion to the remote website can't
    38  	// be established
    39  	ErrRequestFailed = errors.New("can't connect to the remote host")
    40  	// ErrInvalidVariables is used when the variables can't be extracted from
    41  	// the request
    42  	ErrInvalidVariables = errors.New("the variables are not valid")
    43  	// ErrMissingVar is used when trying to use a variable that has not been defined
    44  	ErrMissingVar = errors.New("a variable is used in the template, but no value was given")
    45  	// ErrInvalidContentType is used when the response has a content-type that
    46  	// we deny for security reasons
    47  	ErrInvalidContentType = errors.New("the content-type for the response is not authorized")
    48  	// ErrRemoteAssetNotFound is used when the wanted remote asset is not part of
    49  	// our defined list.
    50  	ErrRemoteAssetNotFound = errors.New("wanted remote asset is not part of our asset list")
    51  )
    52  
    53  const rawURL = "https://raw.githubusercontent.com/cozy/cozy-doctypes/master/%s/request"
    54  
    55  var remoteClient = &http.Client{
    56  	Timeout: 20 * time.Second,
    57  }
    58  
    59  var assetsClient = &http.Client{
    60  	Timeout:   20 * time.Second,
    61  	Transport: httpcache.NewMemoryCacheTransport(32),
    62  }
    63  
    64  // Doctype is used to describe a doctype, its request for a remote doctype for example
    65  type Doctype struct {
    66  	DocID     string    `json:"_id,omitempty"`
    67  	DocRev    string    `json:"_rev,omitempty"`
    68  	Request   string    `json:"request"`
    69  	UpdatedAt time.Time `json:"updated_at"`
    70  }
    71  
    72  // ID is used to implement the couchdb.Doc interface
    73  func (d *Doctype) ID() string { return d.DocID }
    74  
    75  // Rev is used to implement the couchdb.Doc interface
    76  func (d *Doctype) Rev() string { return d.DocRev }
    77  
    78  // SetID is used to implement the couchdb.Doc interface
    79  func (d *Doctype) SetID(id string) { d.DocID = id }
    80  
    81  // SetRev is used to implement the couchdb.Doc interface
    82  func (d *Doctype) SetRev(rev string) { d.DocRev = rev }
    83  
    84  // DocType implements couchdb.Doc
    85  func (d *Doctype) DocType() string { return consts.Doctypes }
    86  
    87  // Clone implements couchdb.Doc
    88  func (d *Doctype) Clone() couchdb.Doc { cloned := *d; return &cloned }
    89  
    90  // Request is used to log in couchdb a call to a remote website
    91  type Request struct {
    92  	DocID         string            `json:"_id,omitempty"`
    93  	DocRev        string            `json:"_rev,omitempty"`
    94  	RemoteDoctype string            `json:"doctype"`
    95  	Verb          string            `json:"verb"`
    96  	URL           string            `json:"url"`
    97  	ResponseCode  int               `json:"response_code"`
    98  	ContentType   string            `json:"content_type"`
    99  	Variables     map[string]string `json:"variables"`
   100  	CreatedAt     time.Time         `json:"created_at"`
   101  	CozyMetadata  CozyMetadata      `json:"cozyMetadata"`
   102  }
   103  
   104  type CozyMetadata struct {
   105  	CreatedByApp string `json:"createdByApp,omitempty"`
   106  }
   107  
   108  // ID is used to implement the couchdb.Doc interface
   109  func (r *Request) ID() string { return r.DocID }
   110  
   111  // Rev is used to implement the couchdb.Doc interface
   112  func (r *Request) Rev() string { return r.DocRev }
   113  
   114  // SetID is used to implement the couchdb.Doc interface
   115  func (r *Request) SetID(id string) { r.DocID = id }
   116  
   117  // SetRev is used to implement the couchdb.Doc interface
   118  func (r *Request) SetRev(rev string) { r.DocRev = rev }
   119  
   120  // DocType implements couchdb.Doc
   121  func (r *Request) DocType() string { return consts.RemoteRequests }
   122  
   123  // Clone implements couchdb.Doc
   124  func (r *Request) Clone() couchdb.Doc {
   125  	cloned := *r
   126  	cloned.Variables = make(map[string]string)
   127  	for k, v := range r.Variables {
   128  		cloned.Variables[k] = v
   129  	}
   130  	return &cloned
   131  }
   132  
   133  // Remote is the struct used to call a remote website for a doctype
   134  type Remote struct {
   135  	Doctype string
   136  	Verb    string
   137  	URL     *url.URL
   138  	Headers map[string]string
   139  	Body    string
   140  }
   141  
   142  var log = logger.WithNamespace("remote")
   143  
   144  // ParseRawRequest takes a string and parse it as a remote struct.
   145  // First line is verb and URL.
   146  // Then, we have the headers.
   147  // And for a POST, we have a blank line, and then the body.
   148  func ParseRawRequest(doctype, raw string) (*Remote, error) {
   149  	lines := strings.Split(raw, "\n")
   150  	parts := strings.SplitN(lines[0], " ", 2)
   151  	if len(parts) != 2 {
   152  		log.Infof("%s cannot be used as a remote doctype", doctype)
   153  		return nil, ErrInvalidRequest
   154  	}
   155  	remote := Remote{Doctype: doctype}
   156  	remote.Verb = parts[0]
   157  	if remote.Verb != echo.GET && remote.Verb != echo.POST {
   158  		log.Infof("Invalid verb for remote doctype %s: %s", doctype, remote.Verb)
   159  		return nil, ErrInvalidRequest
   160  	}
   161  	u, err := url.Parse(parts[1])
   162  	if err != nil {
   163  		log.Infof("Invalid URL for remote doctype %s: %s", doctype, parts[1])
   164  		return nil, ErrInvalidRequest
   165  	}
   166  	if u.Scheme != "https" && u.Scheme != "http" {
   167  		log.Infof("Invalid scheme for remote doctype %s: %s", doctype, u.Scheme)
   168  		return nil, ErrInvalidRequest
   169  	}
   170  	remote.URL = u
   171  	remote.Headers = make(map[string]string)
   172  	for i, line := range lines[1:] {
   173  		if line == "" {
   174  			if remote.Verb == echo.GET {
   175  				continue
   176  			}
   177  			remote.Body = strings.Join(lines[i+2:], "\n")
   178  			break
   179  		}
   180  		parts = strings.SplitN(line, ":", 2)
   181  		if len(parts) != 2 {
   182  			log.Infof("Invalid header for remote doctype %s: %s", doctype, line)
   183  			return nil, ErrInvalidRequest
   184  		}
   185  		remote.Headers[parts[0]] = strings.TrimSpace(parts[1])
   186  	}
   187  	return &remote, nil
   188  }
   189  
   190  func lockDoctype(inst *instance.Instance, docID string) func() {
   191  	mu := config.Lock().ReadWrite(inst, docID)
   192  	_ = mu.Lock()
   193  	return mu.Unlock
   194  }
   195  
   196  // Find finds the request defined for the given doctype
   197  func Find(ins *instance.Instance, doctype string) (*Remote, error) {
   198  	var raw string
   199  
   200  	if config.GetConfig().Doctypes == "" {
   201  		dt := Doctype{
   202  			DocID: consts.Doctypes + "/" + doctype,
   203  		}
   204  		defer lockDoctype(ins, dt.DocID)()
   205  		err := couchdb.GetDoc(ins, consts.Doctypes, dt.DocID, &dt)
   206  		if err != nil || dt.UpdatedAt.Add(24*time.Hour).Before(time.Now()) {
   207  			rev := dt.Rev()
   208  			u := fmt.Sprintf(rawURL, doctype)
   209  			req, err := http.NewRequest(http.MethodGet, u, nil)
   210  			if err != nil {
   211  				return nil, err
   212  			}
   213  			log.Debugf("Fetch remote doctype from %s\n", doctype)
   214  			res, err := remoteClient.Do(req)
   215  			if err != nil {
   216  				log.Infof("Request not found for remote doctype %s: %s", doctype, err)
   217  				return nil, ErrNotFoundRemote
   218  			}
   219  			defer res.Body.Close()
   220  			b, err := io.ReadAll(res.Body)
   221  			if err != nil {
   222  				log.Infof("Request not found for remote doctype %s: %s", doctype, err)
   223  				return nil, ErrNotFoundRemote
   224  			}
   225  			dt.Request = string(b)
   226  			dt.UpdatedAt = time.Now()
   227  			if rev == "" {
   228  				err = couchdb.CreateNamedDocWithDB(ins, &dt)
   229  			} else {
   230  				dt.SetRev(rev)
   231  				err = couchdb.UpdateDoc(ins, &dt)
   232  			}
   233  			if err != nil {
   234  				log.Infof("Cannot save remote doctype %s: %s", doctype, err)
   235  			}
   236  		}
   237  		raw = dt.Request
   238  	} else {
   239  		filename := path.Join(config.GetConfig().Doctypes, doctype, "request")
   240  		bytes, err := os.ReadFile(filename)
   241  		if err != nil {
   242  			log.Infof("Cannot read file %s: %s", filename, err)
   243  			return nil, ErrNotFoundRemote
   244  		}
   245  		raw = string(bytes)
   246  	}
   247  
   248  	return ParseRawRequest(doctype, raw)
   249  }
   250  
   251  // extractVariables extracts the variables:
   252  // - from the query string for a GET
   253  // - from the body formatted as JSON for a POST
   254  func extractVariables(verb string, in *http.Request) (map[string]string, error) {
   255  	vars := make(map[string]string)
   256  	if verb == echo.GET {
   257  		for k, v := range in.URL.Query() {
   258  			vars[k] = v[0]
   259  		}
   260  	} else {
   261  		err := json.NewDecoder(in.Body).Decode(&vars)
   262  		if err != nil {
   263  			return nil, err
   264  		}
   265  	}
   266  	return vars, nil
   267  }
   268  
   269  func findSecret(doctype, secretName string) (string, bool) {
   270  	var doc couchdb.JSONDoc
   271  	err := couchdb.GetDoc(prefixer.SecretsPrefixer, consts.RemoteSecrets, doctype, &doc)
   272  	if err != nil {
   273  		return "", false
   274  	}
   275  	secret, ok := doc.M[secretName].(string)
   276  	return secret, ok
   277  }
   278  
   279  var injectionRegexp = regexp.MustCompile(`{{[0-9A-Za-z_ ]+}}`)
   280  
   281  func injectVar(src string, vars map[string]string, defautFunc, doctype string) (string, error) {
   282  	var err error
   283  	result := injectionRegexp.ReplaceAllStringFunc(src, func(m string) string {
   284  		m = strings.TrimSpace(m[2 : len(m)-2])
   285  
   286  		var funname string
   287  		var varname string
   288  		if defautFunc == "" {
   289  			ms := strings.SplitN(m, " ", 2)
   290  			if len(ms) == 1 {
   291  				varname = ms[0]
   292  			} else {
   293  				funname = ms[0]
   294  				varname = ms[1]
   295  			}
   296  		} else {
   297  			varname = m
   298  			funname = defautFunc
   299  		}
   300  
   301  		val, ok := vars[varname]
   302  		if !ok && strings.HasPrefix(varname, "secret_") {
   303  			val, ok = findSecret(doctype, strings.TrimPrefix(varname, "secret_"))
   304  		}
   305  		if !ok {
   306  			err = ErrMissingVar
   307  			return ""
   308  		}
   309  
   310  		switch funname {
   311  		case "":
   312  			return val
   313  		case "query":
   314  			return url.QueryEscape(val)
   315  		case "path":
   316  			return url.PathEscape(val)
   317  		case "header":
   318  			return strings.ReplaceAll(val, "\n", "\\n")
   319  		case "json":
   320  			var b []byte
   321  			b, err = json.Marshal(val)
   322  			if err != nil {
   323  				return ""
   324  			}
   325  			return string(b[1 : len(b)-1])
   326  		case "html":
   327  			return html.EscapeString(val)
   328  		default:
   329  			err = fmt.Errorf("remote: unknown template function %s", funname)
   330  			return ""
   331  		}
   332  	})
   333  	return result, err
   334  }
   335  
   336  // injectVariables replaces {{variable}} by its value in some fields of the
   337  // remote struct
   338  func injectVariables(remote *Remote, vars map[string]string) error {
   339  	var err error
   340  	if strings.Contains(remote.URL.Path, "{{") {
   341  		remote.URL.Path, err = injectVar(remote.URL.Path, vars, "path", remote.Doctype)
   342  		if err != nil {
   343  			return err
   344  		}
   345  	}
   346  	if strings.Contains(remote.URL.RawQuery, "{{") {
   347  		remote.URL.RawQuery, err = injectVar(remote.URL.RawQuery, vars, "query", remote.Doctype)
   348  		if err != nil {
   349  			return err
   350  		}
   351  	}
   352  	for k, v := range remote.Headers {
   353  		if strings.Contains(v, "{{") {
   354  			remote.Headers[k], err = injectVar(v, vars, "header", remote.Doctype)
   355  			if err != nil {
   356  				return err
   357  			}
   358  		}
   359  	}
   360  	if strings.Contains(remote.Body, "{{") {
   361  		remote.Body, err = injectVar(remote.Body, vars, "", remote.Doctype)
   362  	}
   363  	return err
   364  }
   365  
   366  // ProxyTo calls the external website and proxy the response
   367  func (remote *Remote) ProxyTo(
   368  	ins *instance.Instance,
   369  	rw http.ResponseWriter,
   370  	in *http.Request,
   371  	slug string,
   372  ) error {
   373  	vars, err := extractVariables(remote.Verb, in)
   374  	if err != nil {
   375  		log.Infof("Error on extracting variables: %s", err)
   376  		return ErrInvalidVariables
   377  	}
   378  	if err = injectVariables(remote, vars); err != nil {
   379  		return err
   380  	}
   381  
   382  	// Sanitize the remote URL
   383  	if !config.GetConfig().RemoteAllowCustomPort {
   384  		if strings.Contains(remote.URL.Host, ":") {
   385  			log.Infof("Invalid host for remote doctype %s: %s", remote.Doctype, remote.URL.Host)
   386  			return ErrInvalidRequest
   387  		}
   388  	}
   389  	remote.URL.User = nil
   390  	remote.URL.Fragment = ""
   391  
   392  	var body io.Reader
   393  	if remote.Verb != "GET" && remote.Verb != "DELETE" {
   394  		body = strings.NewReader(remote.Body)
   395  	}
   396  	req, err := http.NewRequest(remote.Verb, remote.URL.String(), body)
   397  	if err != nil {
   398  		return ErrInvalidRequest
   399  	}
   400  
   401  	req.Header.Set("User-Agent", "cozy-stack "+build.Version+" ("+runtime.Version()+")")
   402  	for k, v := range remote.Headers {
   403  		req.Header.Set(k, v)
   404  	}
   405  
   406  	res, err := remoteClient.Do(req)
   407  	if err != nil {
   408  		log.Infof("Error on request %s: %s", remote.URL.String(), err)
   409  		return ErrRequestFailed
   410  	}
   411  	defer res.Body.Close()
   412  
   413  	ctype, _, err := mime.ParseMediaType(res.Header.Get(echo.HeaderContentType))
   414  	if err != nil {
   415  		log.Infof("request %s has an invalid content-type", remote.URL.String())
   416  		return ErrInvalidContentType
   417  	}
   418  	if ctype != "application/json" &&
   419  		ctype != "text/xml" &&
   420  		ctype != "text/plain" &&
   421  		ctype != "application/xml" &&
   422  		ctype != "application/vnd.api+json" &&
   423  		ctype != "application/sparql-results+json" {
   424  		class := strings.SplitN(ctype, "/", 2)[0]
   425  		if class != "image" && class != "audio" && class != "video" {
   426  			log.Infof("request %s has a content-type that is not allowed: %s",
   427  				remote.URL.String(), ctype)
   428  			return ErrInvalidContentType
   429  		}
   430  	}
   431  
   432  	logged := &Request{
   433  		RemoteDoctype: remote.Doctype,
   434  		Verb:          remote.Verb,
   435  		URL:           remote.URL.String(),
   436  		ResponseCode:  res.StatusCode,
   437  		ContentType:   ctype,
   438  		Variables:     vars,
   439  		CreatedAt:     time.Now(),
   440  		CozyMetadata:  CozyMetadata{CreatedByApp: slug},
   441  	}
   442  	err = couchdb.CreateDoc(ins, logged)
   443  	if err != nil {
   444  		log.Errorf("Can't save remote request: %s", err)
   445  	}
   446  	log.Debugf("Remote request: %#v\n", logged)
   447  
   448  	copyHeader(rw.Header(), res.Header)
   449  	rw.WriteHeader(res.StatusCode)
   450  	_, err = io.Copy(rw, res.Body)
   451  	if err != nil {
   452  		log.Infof("Error on copying response from %s: %s", remote.URL.String(), err)
   453  	}
   454  	return nil
   455  }
   456  
   457  // ProxyRemoteAsset proxy the given http request to fetch an asset from our
   458  // list of available asset list.
   459  func ProxyRemoteAsset(name string, w http.ResponseWriter) error {
   460  	assetURL, ok := config.GetConfig().RemoteAssets[name]
   461  	if !ok {
   462  		return ErrRemoteAssetNotFound
   463  	}
   464  
   465  	if build.IsDevRelease() && strings.HasPrefix(assetURL, "file:") {
   466  		return serveLocalRemoteAsset(assetURL, w)
   467  	}
   468  
   469  	req, err := http.NewRequest(http.MethodGet, assetURL, nil)
   470  	if err != nil {
   471  		return err
   472  	}
   473  	req.Header.Set("User-Agent",
   474  		"cozy-stack "+build.Version+" ("+runtime.Version()+")")
   475  
   476  	res, err := assetsClient.Do(req)
   477  	if err != nil {
   478  		return err
   479  	}
   480  	defer res.Body.Close()
   481  
   482  	copyHeader(w.Header(), res.Header)
   483  	w.WriteHeader(res.StatusCode)
   484  
   485  	_, err = io.Copy(w, res.Body)
   486  	return err
   487  }
   488  
   489  func serveLocalRemoteAsset(assetURL string, w http.ResponseWriter) error {
   490  	u, err := url.Parse(assetURL)
   491  	if err != nil {
   492  		return err
   493  	}
   494  
   495  	f, err := os.Open(u.Path)
   496  	if err != nil {
   497  		return err
   498  	}
   499  	defer f.Close()
   500  
   501  	ext := path.Ext(u.Path)
   502  	mimetype := filetype.ByExtension(ext)
   503  	w.Header().Set(echo.HeaderContentType, mimetype)
   504  	w.WriteHeader(http.StatusOK)
   505  
   506  	_, err = io.Copy(w, f)
   507  	return err
   508  }
   509  
   510  var doNotCopyHeaders = []string{
   511  	echo.HeaderSetCookie,
   512  	echo.HeaderAccessControlAllowOrigin,
   513  	echo.HeaderAccessControlAllowMethods,
   514  	echo.HeaderAccessControlAllowCredentials,
   515  	echo.HeaderAccessControlAllowHeaders,
   516  	echo.HeaderAccessControlMaxAge,
   517  	echo.HeaderContentSecurityPolicy,
   518  	echo.HeaderContentSecurityPolicyReportOnly,
   519  	echo.HeaderStrictTransportSecurity,
   520  	echo.HeaderXFrameOptions,
   521  }
   522  
   523  func copyHeader(dst, src http.Header) {
   524  	for k, vv := range src {
   525  		copy := true
   526  		for _, h := range doNotCopyHeaders {
   527  			if k == h {
   528  				copy = false
   529  				break
   530  			}
   531  		}
   532  		if copy {
   533  			for _, v := range vv {
   534  				dst.Add(k, v)
   535  			}
   536  		}
   537  	}
   538  }
   539  
   540  var (
   541  	_ couchdb.Doc = (*Doctype)(nil)
   542  	_ couchdb.Doc = (*Request)(nil)
   543  )