github.com/linchen2chris/hugo@v0.0.0-20230307053224-cec209389705/resources/resource_factories/create/remote.go (about)

     1  // Copyright 2021 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package create
    15  
    16  import (
    17  	"bufio"
    18  	"bytes"
    19  	"fmt"
    20  	"io"
    21  	"mime"
    22  	"net/http"
    23  	"net/http/httputil"
    24  	"net/url"
    25  	"path"
    26  	"path/filepath"
    27  	"strings"
    28  
    29  	"github.com/gohugoio/hugo/common/hugio"
    30  	"github.com/gohugoio/hugo/common/maps"
    31  	"github.com/gohugoio/hugo/common/types"
    32  	"github.com/gohugoio/hugo/identity"
    33  	"github.com/gohugoio/hugo/media"
    34  	"github.com/gohugoio/hugo/resources"
    35  	"github.com/gohugoio/hugo/resources/resource"
    36  	"github.com/mitchellh/mapstructure"
    37  )
    38  
    39  type HTTPError struct {
    40  	error
    41  	Data map[string]any
    42  
    43  	StatusCode int
    44  	Body       string
    45  }
    46  
    47  func responseToData(res *http.Response, readBody bool) map[string]any {
    48  	var body []byte
    49  	if readBody {
    50  		body, _ = io.ReadAll(res.Body)
    51  	}
    52  
    53  	m := map[string]any{
    54  		"StatusCode":       res.StatusCode,
    55  		"Status":           res.Status,
    56  		"TransferEncoding": res.TransferEncoding,
    57  		"ContentLength":    res.ContentLength,
    58  		"ContentType":      res.Header.Get("Content-Type"),
    59  	}
    60  
    61  	if readBody {
    62  		m["Body"] = string(body)
    63  	}
    64  
    65  	return m
    66  
    67  }
    68  
    69  func toHTTPError(err error, res *http.Response, readBody bool) *HTTPError {
    70  	if err == nil {
    71  		panic("err is nil")
    72  	}
    73  	if res == nil {
    74  		return &HTTPError{
    75  			error: err,
    76  			Data:  map[string]any{},
    77  		}
    78  	}
    79  
    80  	return &HTTPError{
    81  		error: err,
    82  		Data:  responseToData(res, readBody),
    83  	}
    84  }
    85  
    86  // FromRemote expects one or n-parts of a URL to a resource
    87  // If you provide multiple parts they will be joined together to the final URL.
    88  func (c *Client) FromRemote(uri string, optionsm map[string]any) (resource.Resource, error) {
    89  	rURL, err := url.Parse(uri)
    90  	if err != nil {
    91  		return nil, fmt.Errorf("failed to parse URL for resource %s: %w", uri, err)
    92  	}
    93  
    94  	method := "GET"
    95  	if s, ok := maps.LookupEqualFold(optionsm, "method"); ok {
    96  		method = strings.ToUpper(s.(string))
    97  	}
    98  	isHeadMethod := method == "HEAD"
    99  
   100  	resourceID := calculateResourceID(uri, optionsm)
   101  
   102  	_, httpResponse, err := c.cacheGetResource.GetOrCreate(resourceID, func() (io.ReadCloser, error) {
   103  		options, err := decodeRemoteOptions(optionsm)
   104  		if err != nil {
   105  			return nil, fmt.Errorf("failed to decode options for resource %s: %w", uri, err)
   106  		}
   107  		if err := c.validateFromRemoteArgs(uri, options); err != nil {
   108  			return nil, err
   109  		}
   110  
   111  		req, err := options.NewRequest(uri)
   112  		if err != nil {
   113  			return nil, fmt.Errorf("failed to create request for resource %s: %w", uri, err)
   114  		}
   115  
   116  		res, err := c.httpClient.Do(req)
   117  		if err != nil {
   118  			return nil, err
   119  		}
   120  		defer res.Body.Close()
   121  
   122  		httpResponse, err := httputil.DumpResponse(res, true)
   123  		if err != nil {
   124  			return nil, toHTTPError(err, res, !isHeadMethod)
   125  		}
   126  
   127  		if res.StatusCode != http.StatusNotFound {
   128  			if res.StatusCode < 200 || res.StatusCode > 299 {
   129  				return nil, toHTTPError(fmt.Errorf("failed to fetch remote resource: %s", http.StatusText(res.StatusCode)), res, !isHeadMethod)
   130  
   131  			}
   132  		}
   133  
   134  		return hugio.ToReadCloser(bytes.NewReader(httpResponse)), nil
   135  	})
   136  	if err != nil {
   137  		return nil, err
   138  	}
   139  	defer httpResponse.Close()
   140  
   141  	res, err := http.ReadResponse(bufio.NewReader(httpResponse), nil)
   142  	if err != nil {
   143  		return nil, err
   144  	}
   145  	defer res.Body.Close()
   146  
   147  	if res.StatusCode == http.StatusNotFound {
   148  		// Not found. This matches how looksup for local resources work.
   149  		return nil, nil
   150  	}
   151  
   152  	var (
   153  		body      []byte
   154  		mediaType media.Type
   155  	)
   156  	// A response to a HEAD method should not have a body. If it has one anyway, that body must be ignored.
   157  	// See https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/HEAD
   158  	if !isHeadMethod && res.Body != nil {
   159  		body, err = io.ReadAll(res.Body)
   160  		if err != nil {
   161  			return nil, fmt.Errorf("failed to read remote resource %q: %w", uri, err)
   162  		}
   163  	}
   164  
   165  	filename := path.Base(rURL.Path)
   166  	if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil {
   167  		if _, ok := params["filename"]; ok {
   168  			filename = params["filename"]
   169  		}
   170  	}
   171  
   172  	contentType := res.Header.Get("Content-Type")
   173  
   174  	if isHeadMethod {
   175  		// We have no body to work with, so we need to use the Content-Type header.
   176  		mediaType, _ = media.FromString(contentType)
   177  	} else {
   178  
   179  		var extensionHints []string
   180  
   181  		// mime.ExtensionsByType gives a long list of extensions for text/plain,
   182  		// just use ".txt".
   183  		if strings.HasPrefix(contentType, "text/plain") {
   184  			extensionHints = []string{".txt"}
   185  		} else {
   186  			exts, _ := mime.ExtensionsByType(contentType)
   187  			if exts != nil {
   188  				extensionHints = exts
   189  			}
   190  		}
   191  
   192  		// Look for a file extension. If it's .txt, look for a more specific.
   193  		if extensionHints == nil || extensionHints[0] == ".txt" {
   194  			if ext := path.Ext(filename); ext != "" {
   195  				extensionHints = []string{ext}
   196  			}
   197  		}
   198  
   199  		// Now resolve the media type primarily using the content.
   200  		mediaType = media.FromContent(c.rs.MediaTypes, extensionHints, body)
   201  
   202  	}
   203  
   204  	if mediaType.IsZero() {
   205  		return nil, fmt.Errorf("failed to resolve media type for remote resource %q", uri)
   206  	}
   207  
   208  	resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + mediaType.FirstSuffix.FullSuffix
   209  	data := responseToData(res, false)
   210  
   211  	return c.rs.New(
   212  		resources.ResourceSourceDescriptor{
   213  			MediaType:   mediaType,
   214  			Data:        data,
   215  			LazyPublish: true,
   216  			OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) {
   217  				return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil
   218  			},
   219  			RelTargetFilename: filepath.Clean(resourceID),
   220  		})
   221  }
   222  
   223  func (c *Client) validateFromRemoteArgs(uri string, options fromRemoteOptions) error {
   224  	if err := c.rs.ExecHelper.Sec().CheckAllowedHTTPURL(uri); err != nil {
   225  		return err
   226  	}
   227  
   228  	if err := c.rs.ExecHelper.Sec().CheckAllowedHTTPMethod(options.Method); err != nil {
   229  		return err
   230  	}
   231  
   232  	return nil
   233  }
   234  
   235  func calculateResourceID(uri string, optionsm map[string]any) string {
   236  	if key, found := maps.LookupEqualFold(optionsm, "key"); found {
   237  		return identity.HashString(key)
   238  	}
   239  	return identity.HashString(uri, optionsm)
   240  }
   241  
   242  func addDefaultHeaders(req *http.Request) {
   243  	if !hasHeaderKey(req.Header, "User-Agent") {
   244  		req.Header.Add("User-Agent", "Hugo Static Site Generator")
   245  	}
   246  }
   247  
   248  func addUserProvidedHeaders(headers map[string]any, req *http.Request) {
   249  	if headers == nil {
   250  		return
   251  	}
   252  	for key, val := range headers {
   253  		vals := types.ToStringSlicePreserveString(val)
   254  		for _, s := range vals {
   255  			req.Header.Add(key, s)
   256  		}
   257  	}
   258  }
   259  
   260  func hasHeaderValue(m http.Header, key, value string) bool {
   261  	var s []string
   262  	var ok bool
   263  
   264  	if s, ok = m[key]; !ok {
   265  		return false
   266  	}
   267  
   268  	for _, v := range s {
   269  		if v == value {
   270  			return true
   271  		}
   272  	}
   273  	return false
   274  }
   275  
   276  func hasHeaderKey(m http.Header, key string) bool {
   277  	_, ok := m[key]
   278  	return ok
   279  }
   280  
   281  type fromRemoteOptions struct {
   282  	Method  string
   283  	Headers map[string]any
   284  	Body    []byte
   285  }
   286  
   287  func (o fromRemoteOptions) BodyReader() io.Reader {
   288  	if o.Body == nil {
   289  		return nil
   290  	}
   291  	return bytes.NewBuffer(o.Body)
   292  }
   293  
   294  func (o fromRemoteOptions) NewRequest(url string) (*http.Request, error) {
   295  	req, err := http.NewRequest(o.Method, url, o.BodyReader())
   296  	if err != nil {
   297  		return nil, err
   298  	}
   299  
   300  	// First add any user provided headers.
   301  	if o.Headers != nil {
   302  		addUserProvidedHeaders(o.Headers, req)
   303  	}
   304  
   305  	// Then add default headers not provided by the user.
   306  	addDefaultHeaders(req)
   307  
   308  	return req, nil
   309  }
   310  
   311  func decodeRemoteOptions(optionsm map[string]any) (fromRemoteOptions, error) {
   312  	options := fromRemoteOptions{
   313  		Method: "GET",
   314  	}
   315  
   316  	err := mapstructure.WeakDecode(optionsm, &options)
   317  	if err != nil {
   318  		return options, err
   319  	}
   320  	options.Method = strings.ToUpper(options.Method)
   321  
   322  	return options, nil
   323  }