github.com/anakojm/hugo-katex@v0.0.0-20231023141351-42d6f5de9c0b/resources/resource_factories/create/remote.go (about)

     1  // Copyright 2021 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package create
    15  
    16  import (
    17  	"bufio"
    18  	"bytes"
    19  	"fmt"
    20  	"io"
    21  	"math/rand"
    22  	"mime"
    23  	"net/http"
    24  	"net/http/httputil"
    25  	"net/url"
    26  	"path"
    27  	"path/filepath"
    28  	"strings"
    29  	"time"
    30  
    31  	"github.com/gohugoio/hugo/common/hugio"
    32  	"github.com/gohugoio/hugo/common/maps"
    33  	"github.com/gohugoio/hugo/common/types"
    34  	"github.com/gohugoio/hugo/identity"
    35  	"github.com/gohugoio/hugo/media"
    36  	"github.com/gohugoio/hugo/resources"
    37  	"github.com/gohugoio/hugo/resources/resource"
    38  	"github.com/mitchellh/mapstructure"
    39  )
    40  
    41  type HTTPError struct {
    42  	error
    43  	Data map[string]any
    44  
    45  	StatusCode int
    46  	Body       string
    47  }
    48  
    49  func responseToData(res *http.Response, readBody bool) map[string]any {
    50  	var body []byte
    51  	if readBody {
    52  		body, _ = io.ReadAll(res.Body)
    53  	}
    54  
    55  	m := map[string]any{
    56  		"StatusCode":       res.StatusCode,
    57  		"Status":           res.Status,
    58  		"TransferEncoding": res.TransferEncoding,
    59  		"ContentLength":    res.ContentLength,
    60  		"ContentType":      res.Header.Get("Content-Type"),
    61  	}
    62  
    63  	if readBody {
    64  		m["Body"] = string(body)
    65  	}
    66  
    67  	return m
    68  
    69  }
    70  
    71  func toHTTPError(err error, res *http.Response, readBody bool) *HTTPError {
    72  	if err == nil {
    73  		panic("err is nil")
    74  	}
    75  	if res == nil {
    76  		return &HTTPError{
    77  			error: err,
    78  			Data:  map[string]any{},
    79  		}
    80  	}
    81  
    82  	return &HTTPError{
    83  		error: err,
    84  		Data:  responseToData(res, readBody),
    85  	}
    86  }
    87  
    88  var temporaryHTTPStatusCodes = map[int]bool{
    89  	408: true,
    90  	429: true,
    91  	500: true,
    92  	502: true,
    93  	503: true,
    94  	504: true,
    95  }
    96  
    97  // FromRemote expects one or n-parts of a URL to a resource
    98  // If you provide multiple parts they will be joined together to the final URL.
    99  func (c *Client) FromRemote(uri string, optionsm map[string]any) (resource.Resource, error) {
   100  	rURL, err := url.Parse(uri)
   101  	if err != nil {
   102  		return nil, fmt.Errorf("failed to parse URL for resource %s: %w", uri, err)
   103  	}
   104  
   105  	method := "GET"
   106  	if s, ok := maps.LookupEqualFold(optionsm, "method"); ok {
   107  		method = strings.ToUpper(s.(string))
   108  	}
   109  	isHeadMethod := method == "HEAD"
   110  
   111  	resourceID := calculateResourceID(uri, optionsm)
   112  
   113  	_, httpResponse, err := c.cacheGetResource.GetOrCreate(resourceID, func() (io.ReadCloser, error) {
   114  		options, err := decodeRemoteOptions(optionsm)
   115  		if err != nil {
   116  			return nil, fmt.Errorf("failed to decode options for resource %s: %w", uri, err)
   117  		}
   118  		if err := c.validateFromRemoteArgs(uri, options); err != nil {
   119  			return nil, err
   120  		}
   121  
   122  		var (
   123  			start          time.Time
   124  			nextSleep      = time.Duration((rand.Intn(1000) + 100)) * time.Millisecond
   125  			nextSleepLimit = time.Duration(5) * time.Second
   126  		)
   127  
   128  		for {
   129  			b, retry, err := func() ([]byte, bool, error) {
   130  				req, err := options.NewRequest(uri)
   131  				if err != nil {
   132  					return nil, false, fmt.Errorf("failed to create request for resource %s: %w", uri, err)
   133  				}
   134  
   135  				res, err := c.httpClient.Do(req)
   136  				if err != nil {
   137  					return nil, false, err
   138  				}
   139  				defer res.Body.Close()
   140  
   141  				if res.StatusCode != http.StatusNotFound {
   142  					if res.StatusCode < 200 || res.StatusCode > 299 {
   143  						return nil, temporaryHTTPStatusCodes[res.StatusCode], toHTTPError(fmt.Errorf("failed to fetch remote resource: %s", http.StatusText(res.StatusCode)), res, !isHeadMethod)
   144  
   145  					}
   146  				}
   147  
   148  				b, err := httputil.DumpResponse(res, true)
   149  				if err != nil {
   150  					return nil, false, toHTTPError(err, res, !isHeadMethod)
   151  				}
   152  
   153  				return b, false, nil
   154  
   155  			}()
   156  
   157  			if err != nil {
   158  				if retry {
   159  					if start.IsZero() {
   160  						start = time.Now()
   161  					} else if d := time.Since(start) + nextSleep; d >= c.rs.Cfg.Timeout() {
   162  						c.rs.Logger.Errorf("Retry timeout (configured to %s) fetching remote resource.", c.rs.Cfg.Timeout())
   163  						return nil, err
   164  					}
   165  					time.Sleep(nextSleep)
   166  					if nextSleep < nextSleepLimit {
   167  						nextSleep *= 2
   168  					}
   169  					continue
   170  				}
   171  				return nil, err
   172  			}
   173  
   174  			return hugio.ToReadCloser(bytes.NewReader(b)), nil
   175  
   176  		}
   177  
   178  	})
   179  	if err != nil {
   180  		return nil, err
   181  	}
   182  	defer httpResponse.Close()
   183  
   184  	res, err := http.ReadResponse(bufio.NewReader(httpResponse), nil)
   185  	if err != nil {
   186  		return nil, err
   187  	}
   188  	defer res.Body.Close()
   189  
   190  	if res.StatusCode == http.StatusNotFound {
   191  		// Not found. This matches how looksup for local resources work.
   192  		return nil, nil
   193  	}
   194  
   195  	var (
   196  		body      []byte
   197  		mediaType media.Type
   198  	)
   199  	// A response to a HEAD method should not have a body. If it has one anyway, that body must be ignored.
   200  	// See https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/HEAD
   201  	if !isHeadMethod && res.Body != nil {
   202  		body, err = io.ReadAll(res.Body)
   203  		if err != nil {
   204  			return nil, fmt.Errorf("failed to read remote resource %q: %w", uri, err)
   205  		}
   206  	}
   207  
   208  	filename := path.Base(rURL.Path)
   209  	if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil {
   210  		if _, ok := params["filename"]; ok {
   211  			filename = params["filename"]
   212  		}
   213  	}
   214  
   215  	contentType := res.Header.Get("Content-Type")
   216  
   217  	// For HEAD requests we have no body to work with, so we need to use the Content-Type header.
   218  	if isHeadMethod || c.rs.ExecHelper.Sec().HTTP.MediaTypes.Accept(contentType) {
   219  		var found bool
   220  		mediaType, found = c.rs.MediaTypes().GetByType(contentType)
   221  		if !found {
   222  			// A media type not configured in Hugo, just create one from the content type string.
   223  			mediaType, _ = media.FromString(contentType)
   224  		}
   225  	}
   226  
   227  	if mediaType.IsZero() {
   228  
   229  		var extensionHints []string
   230  
   231  		// mime.ExtensionsByType gives a long list of extensions for text/plain,
   232  		// just use ".txt".
   233  		if strings.HasPrefix(contentType, "text/plain") {
   234  			extensionHints = []string{".txt"}
   235  		} else {
   236  			exts, _ := mime.ExtensionsByType(contentType)
   237  			if exts != nil {
   238  				extensionHints = exts
   239  			}
   240  		}
   241  
   242  		// Look for a file extension. If it's .txt, look for a more specific.
   243  		if extensionHints == nil || extensionHints[0] == ".txt" {
   244  			if ext := path.Ext(filename); ext != "" {
   245  				extensionHints = []string{ext}
   246  			}
   247  		}
   248  
   249  		// Now resolve the media type primarily using the content.
   250  		mediaType = media.FromContent(c.rs.MediaTypes(), extensionHints, body)
   251  
   252  	}
   253  
   254  	if mediaType.IsZero() {
   255  		return nil, fmt.Errorf("failed to resolve media type for remote resource %q", uri)
   256  	}
   257  
   258  	resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + mediaType.FirstSuffix.FullSuffix
   259  	data := responseToData(res, false)
   260  
   261  	return c.rs.New(
   262  		resources.ResourceSourceDescriptor{
   263  			MediaType:   mediaType,
   264  			Data:        data,
   265  			LazyPublish: true,
   266  			OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) {
   267  				return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil
   268  			},
   269  			RelTargetFilename: filepath.Clean(resourceID),
   270  		})
   271  }
   272  
   273  func (c *Client) validateFromRemoteArgs(uri string, options fromRemoteOptions) error {
   274  	if err := c.rs.ExecHelper.Sec().CheckAllowedHTTPURL(uri); err != nil {
   275  		return err
   276  	}
   277  
   278  	if err := c.rs.ExecHelper.Sec().CheckAllowedHTTPMethod(options.Method); err != nil {
   279  		return err
   280  	}
   281  
   282  	return nil
   283  }
   284  
   285  func calculateResourceID(uri string, optionsm map[string]any) string {
   286  	if key, found := maps.LookupEqualFold(optionsm, "key"); found {
   287  		return identity.HashString(key)
   288  	}
   289  	return identity.HashString(uri, optionsm)
   290  }
   291  
   292  func addDefaultHeaders(req *http.Request) {
   293  	if !hasHeaderKey(req.Header, "User-Agent") {
   294  		req.Header.Add("User-Agent", "Hugo Static Site Generator")
   295  	}
   296  }
   297  
   298  func addUserProvidedHeaders(headers map[string]any, req *http.Request) {
   299  	if headers == nil {
   300  		return
   301  	}
   302  	for key, val := range headers {
   303  		vals := types.ToStringSlicePreserveString(val)
   304  		for _, s := range vals {
   305  			req.Header.Add(key, s)
   306  		}
   307  	}
   308  }
   309  
   310  func hasHeaderValue(m http.Header, key, value string) bool {
   311  	var s []string
   312  	var ok bool
   313  
   314  	if s, ok = m[key]; !ok {
   315  		return false
   316  	}
   317  
   318  	for _, v := range s {
   319  		if v == value {
   320  			return true
   321  		}
   322  	}
   323  	return false
   324  }
   325  
   326  func hasHeaderKey(m http.Header, key string) bool {
   327  	_, ok := m[key]
   328  	return ok
   329  }
   330  
   331  type fromRemoteOptions struct {
   332  	Method  string
   333  	Headers map[string]any
   334  	Body    []byte
   335  }
   336  
   337  func (o fromRemoteOptions) BodyReader() io.Reader {
   338  	if o.Body == nil {
   339  		return nil
   340  	}
   341  	return bytes.NewBuffer(o.Body)
   342  }
   343  
   344  func (o fromRemoteOptions) NewRequest(url string) (*http.Request, error) {
   345  	req, err := http.NewRequest(o.Method, url, o.BodyReader())
   346  	if err != nil {
   347  		return nil, err
   348  	}
   349  
   350  	// First add any user provided headers.
   351  	if o.Headers != nil {
   352  		addUserProvidedHeaders(o.Headers, req)
   353  	}
   354  
   355  	// Then add default headers not provided by the user.
   356  	addDefaultHeaders(req)
   357  
   358  	return req, nil
   359  }
   360  
   361  func decodeRemoteOptions(optionsm map[string]any) (fromRemoteOptions, error) {
   362  	options := fromRemoteOptions{
   363  		Method: "GET",
   364  	}
   365  
   366  	err := mapstructure.WeakDecode(optionsm, &options)
   367  	if err != nil {
   368  		return options, err
   369  	}
   370  	options.Method = strings.ToUpper(options.Method)
   371  
   372  	return options, nil
   373  }