github.com/kovansky/hugo@v0.92.3-0.20220224232819-63076e4ff19f/resources/resource_factories/create/remote.go (about)

     1  // Copyright 2021 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package create
    15  
    16  import (
    17  	"bufio"
    18  	"bytes"
    19  	"io"
    20  	"io/ioutil"
    21  	"mime"
    22  	"net/http"
    23  	"net/http/httputil"
    24  	"net/url"
    25  	"path"
    26  	"path/filepath"
    27  	"strings"
    28  
    29  	"github.com/gohugoio/hugo/common/hugio"
    30  	"github.com/gohugoio/hugo/common/types"
    31  	"github.com/gohugoio/hugo/helpers"
    32  	"github.com/gohugoio/hugo/media"
    33  	"github.com/gohugoio/hugo/resources"
    34  	"github.com/gohugoio/hugo/resources/resource"
    35  	"github.com/mitchellh/mapstructure"
    36  	"github.com/pkg/errors"
    37  )
    38  
    39  // FromRemote expects one or n-parts of a URL to a resource
    40  // If you provide multiple parts they will be joined together to the final URL.
    41  func (c *Client) FromRemote(uri string, optionsm map[string]interface{}) (resource.Resource, error) {
    42  	rURL, err := url.Parse(uri)
    43  	if err != nil {
    44  		return nil, errors.Wrapf(err, "failed to parse URL for resource %s", uri)
    45  	}
    46  
    47  	resourceID := helpers.HashString(uri, optionsm)
    48  
    49  	_, httpResponse, err := c.cacheGetResource.GetOrCreate(resourceID, func() (io.ReadCloser, error) {
    50  		options, err := decodeRemoteOptions(optionsm)
    51  		if err != nil {
    52  			return nil, errors.Wrapf(err, "failed to decode options for resource %s", uri)
    53  		}
    54  		if err := c.validateFromRemoteArgs(uri, options); err != nil {
    55  			return nil, err
    56  		}
    57  
    58  		req, err := http.NewRequest(options.Method, uri, options.BodyReader())
    59  		if err != nil {
    60  			return nil, errors.Wrapf(err, "failed to create request for resource %s", uri)
    61  		}
    62  		addDefaultHeaders(req)
    63  
    64  		if options.Headers != nil {
    65  			addUserProvidedHeaders(options.Headers, req)
    66  		}
    67  
    68  		res, err := c.httpClient.Do(req)
    69  		if err != nil {
    70  			return nil, err
    71  		}
    72  
    73  		if res.StatusCode != http.StatusNotFound {
    74  			if res.StatusCode < 200 || res.StatusCode > 299 {
    75  				return nil, errors.Errorf("failed to fetch remote resource: %s", http.StatusText(res.StatusCode))
    76  			}
    77  		}
    78  
    79  		httpResponse, err := httputil.DumpResponse(res, true)
    80  		if err != nil {
    81  			return nil, err
    82  		}
    83  
    84  		return hugio.ToReadCloser(bytes.NewReader(httpResponse)), nil
    85  	})
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  	defer httpResponse.Close()
    90  
    91  	res, err := http.ReadResponse(bufio.NewReader(httpResponse), nil)
    92  	if err != nil {
    93  		return nil, err
    94  	}
    95  
    96  	if res.StatusCode == http.StatusNotFound {
    97  		// Not found. This matches how looksup for local resources work.
    98  		return nil, nil
    99  	}
   100  
   101  	body, err := ioutil.ReadAll(res.Body)
   102  	if err != nil {
   103  		return nil, errors.Wrapf(err, "failed to read remote resource %q", uri)
   104  	}
   105  
   106  	filename := path.Base(rURL.Path)
   107  	if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil {
   108  		if _, ok := params["filename"]; ok {
   109  			filename = params["filename"]
   110  		}
   111  	}
   112  
   113  	var extensionHints []string
   114  
   115  	contentType := res.Header.Get("Content-Type")
   116  
   117  	// mime.ExtensionsByType gives a long list of extensions for text/plain,
   118  	// just use ".txt".
   119  	if strings.HasPrefix(contentType, "text/plain") {
   120  		extensionHints = []string{".txt"}
   121  	} else {
   122  		exts, _ := mime.ExtensionsByType(contentType)
   123  		if exts != nil {
   124  			extensionHints = exts
   125  		}
   126  	}
   127  
   128  	// Look for a file extention. If it's .txt, look for a more specific.
   129  	if extensionHints == nil || extensionHints[0] == ".txt" {
   130  		if ext := path.Ext(filename); ext != "" {
   131  			extensionHints = []string{ext}
   132  		}
   133  	}
   134  
   135  	// Now resolve the media type primarily using the content.
   136  	mediaType := media.FromContent(c.rs.MediaTypes, extensionHints, body)
   137  	if mediaType.IsZero() {
   138  		return nil, errors.Errorf("failed to resolve media type for remote resource %q", uri)
   139  	}
   140  
   141  	resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + mediaType.FirstSuffix.FullSuffix
   142  
   143  	return c.rs.New(
   144  		resources.ResourceSourceDescriptor{
   145  			MediaType:   mediaType,
   146  			LazyPublish: true,
   147  			OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) {
   148  				return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil
   149  			},
   150  			RelTargetFilename: filepath.Clean(resourceID),
   151  		})
   152  }
   153  
   154  func (c *Client) validateFromRemoteArgs(uri string, options fromRemoteOptions) error {
   155  	if err := c.rs.ExecHelper.Sec().CheckAllowedHTTPURL(uri); err != nil {
   156  		return err
   157  	}
   158  
   159  	if err := c.rs.ExecHelper.Sec().CheckAllowedHTTPMethod(options.Method); err != nil {
   160  		return err
   161  	}
   162  
   163  	return nil
   164  }
   165  
   166  func addDefaultHeaders(req *http.Request, accepts ...string) {
   167  	for _, accept := range accepts {
   168  		if !hasHeaderValue(req.Header, "Accept", accept) {
   169  			req.Header.Add("Accept", accept)
   170  		}
   171  	}
   172  	if !hasHeaderKey(req.Header, "User-Agent") {
   173  		req.Header.Add("User-Agent", "Hugo Static Site Generator")
   174  	}
   175  }
   176  
   177  func addUserProvidedHeaders(headers map[string]interface{}, req *http.Request) {
   178  	if headers == nil {
   179  		return
   180  	}
   181  	for key, val := range headers {
   182  		vals := types.ToStringSlicePreserveString(val)
   183  		for _, s := range vals {
   184  			req.Header.Add(key, s)
   185  		}
   186  	}
   187  }
   188  
   189  func hasHeaderValue(m http.Header, key, value string) bool {
   190  	var s []string
   191  	var ok bool
   192  
   193  	if s, ok = m[key]; !ok {
   194  		return false
   195  	}
   196  
   197  	for _, v := range s {
   198  		if v == value {
   199  			return true
   200  		}
   201  	}
   202  	return false
   203  }
   204  
   205  func hasHeaderKey(m http.Header, key string) bool {
   206  	_, ok := m[key]
   207  	return ok
   208  }
   209  
   210  type fromRemoteOptions struct {
   211  	Method  string
   212  	Headers map[string]interface{}
   213  	Body    []byte
   214  }
   215  
   216  func (o fromRemoteOptions) BodyReader() io.Reader {
   217  	if o.Body == nil {
   218  		return nil
   219  	}
   220  	return bytes.NewBuffer(o.Body)
   221  }
   222  
   223  func decodeRemoteOptions(optionsm map[string]interface{}) (fromRemoteOptions, error) {
   224  	options := fromRemoteOptions{
   225  		Method: "GET",
   226  	}
   227  
   228  	err := mapstructure.WeakDecode(optionsm, &options)
   229  	if err != nil {
   230  		return options, err
   231  	}
   232  	options.Method = strings.ToUpper(options.Method)
   233  
   234  	return options, nil
   235  }