github.com/anakojm/hugo-katex@v0.0.0-20231023141351-42d6f5de9c0b/resources/resource_factories/create/remote.go (about) 1 // Copyright 2021 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package create 15 16 import ( 17 "bufio" 18 "bytes" 19 "fmt" 20 "io" 21 "math/rand" 22 "mime" 23 "net/http" 24 "net/http/httputil" 25 "net/url" 26 "path" 27 "path/filepath" 28 "strings" 29 "time" 30 31 "github.com/gohugoio/hugo/common/hugio" 32 "github.com/gohugoio/hugo/common/maps" 33 "github.com/gohugoio/hugo/common/types" 34 "github.com/gohugoio/hugo/identity" 35 "github.com/gohugoio/hugo/media" 36 "github.com/gohugoio/hugo/resources" 37 "github.com/gohugoio/hugo/resources/resource" 38 "github.com/mitchellh/mapstructure" 39 ) 40 41 type HTTPError struct { 42 error 43 Data map[string]any 44 45 StatusCode int 46 Body string 47 } 48 49 func responseToData(res *http.Response, readBody bool) map[string]any { 50 var body []byte 51 if readBody { 52 body, _ = io.ReadAll(res.Body) 53 } 54 55 m := map[string]any{ 56 "StatusCode": res.StatusCode, 57 "Status": res.Status, 58 "TransferEncoding": res.TransferEncoding, 59 "ContentLength": res.ContentLength, 60 "ContentType": res.Header.Get("Content-Type"), 61 } 62 63 if readBody { 64 m["Body"] = string(body) 65 } 66 67 return m 68 69 } 70 71 func toHTTPError(err error, res *http.Response, readBody bool) *HTTPError { 72 if err == nil { 73 panic("err is nil") 74 } 75 if res == nil { 76 return &HTTPError{ 77 error: err, 78 Data: map[string]any{}, 79 } 80 } 81 82 return &HTTPError{ 83 error: err, 84 Data: responseToData(res, readBody), 85 } 86 } 87 88 var temporaryHTTPStatusCodes = map[int]bool{ 89 408: true, 90 429: true, 91 500: true, 92 502: true, 93 503: true, 94 504: true, 95 } 96 97 // FromRemote expects one or n-parts of a URL to a resource 98 // If you provide multiple parts they will be joined together to the final URL. 99 func (c *Client) FromRemote(uri string, optionsm map[string]any) (resource.Resource, error) { 100 rURL, err := url.Parse(uri) 101 if err != nil { 102 return nil, fmt.Errorf("failed to parse URL for resource %s: %w", uri, err) 103 } 104 105 method := "GET" 106 if s, ok := maps.LookupEqualFold(optionsm, "method"); ok { 107 method = strings.ToUpper(s.(string)) 108 } 109 isHeadMethod := method == "HEAD" 110 111 resourceID := calculateResourceID(uri, optionsm) 112 113 _, httpResponse, err := c.cacheGetResource.GetOrCreate(resourceID, func() (io.ReadCloser, error) { 114 options, err := decodeRemoteOptions(optionsm) 115 if err != nil { 116 return nil, fmt.Errorf("failed to decode options for resource %s: %w", uri, err) 117 } 118 if err := c.validateFromRemoteArgs(uri, options); err != nil { 119 return nil, err 120 } 121 122 var ( 123 start time.Time 124 nextSleep = time.Duration((rand.Intn(1000) + 100)) * time.Millisecond 125 nextSleepLimit = time.Duration(5) * time.Second 126 ) 127 128 for { 129 b, retry, err := func() ([]byte, bool, error) { 130 req, err := options.NewRequest(uri) 131 if err != nil { 132 return nil, false, fmt.Errorf("failed to create request for resource %s: %w", uri, err) 133 } 134 135 res, err := c.httpClient.Do(req) 136 if err != nil { 137 return nil, false, err 138 } 139 defer res.Body.Close() 140 141 if res.StatusCode != http.StatusNotFound { 142 if res.StatusCode < 200 || res.StatusCode > 299 { 143 return nil, temporaryHTTPStatusCodes[res.StatusCode], toHTTPError(fmt.Errorf("failed to fetch remote resource: %s", http.StatusText(res.StatusCode)), res, !isHeadMethod) 144 145 } 146 } 147 148 b, err := httputil.DumpResponse(res, true) 149 if err != nil { 150 return nil, false, toHTTPError(err, res, !isHeadMethod) 151 } 152 153 return b, false, nil 154 155 }() 156 157 if err != nil { 158 if retry { 159 if start.IsZero() { 160 start = time.Now() 161 } else if d := time.Since(start) + nextSleep; d >= c.rs.Cfg.Timeout() { 162 c.rs.Logger.Errorf("Retry timeout (configured to %s) fetching remote resource.", c.rs.Cfg.Timeout()) 163 return nil, err 164 } 165 time.Sleep(nextSleep) 166 if nextSleep < nextSleepLimit { 167 nextSleep *= 2 168 } 169 continue 170 } 171 return nil, err 172 } 173 174 return hugio.ToReadCloser(bytes.NewReader(b)), nil 175 176 } 177 178 }) 179 if err != nil { 180 return nil, err 181 } 182 defer httpResponse.Close() 183 184 res, err := http.ReadResponse(bufio.NewReader(httpResponse), nil) 185 if err != nil { 186 return nil, err 187 } 188 defer res.Body.Close() 189 190 if res.StatusCode == http.StatusNotFound { 191 // Not found. This matches how looksup for local resources work. 192 return nil, nil 193 } 194 195 var ( 196 body []byte 197 mediaType media.Type 198 ) 199 // A response to a HEAD method should not have a body. If it has one anyway, that body must be ignored. 200 // See https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/HEAD 201 if !isHeadMethod && res.Body != nil { 202 body, err = io.ReadAll(res.Body) 203 if err != nil { 204 return nil, fmt.Errorf("failed to read remote resource %q: %w", uri, err) 205 } 206 } 207 208 filename := path.Base(rURL.Path) 209 if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil { 210 if _, ok := params["filename"]; ok { 211 filename = params["filename"] 212 } 213 } 214 215 contentType := res.Header.Get("Content-Type") 216 217 // For HEAD requests we have no body to work with, so we need to use the Content-Type header. 218 if isHeadMethod || c.rs.ExecHelper.Sec().HTTP.MediaTypes.Accept(contentType) { 219 var found bool 220 mediaType, found = c.rs.MediaTypes().GetByType(contentType) 221 if !found { 222 // A media type not configured in Hugo, just create one from the content type string. 223 mediaType, _ = media.FromString(contentType) 224 } 225 } 226 227 if mediaType.IsZero() { 228 229 var extensionHints []string 230 231 // mime.ExtensionsByType gives a long list of extensions for text/plain, 232 // just use ".txt". 233 if strings.HasPrefix(contentType, "text/plain") { 234 extensionHints = []string{".txt"} 235 } else { 236 exts, _ := mime.ExtensionsByType(contentType) 237 if exts != nil { 238 extensionHints = exts 239 } 240 } 241 242 // Look for a file extension. If it's .txt, look for a more specific. 243 if extensionHints == nil || extensionHints[0] == ".txt" { 244 if ext := path.Ext(filename); ext != "" { 245 extensionHints = []string{ext} 246 } 247 } 248 249 // Now resolve the media type primarily using the content. 250 mediaType = media.FromContent(c.rs.MediaTypes(), extensionHints, body) 251 252 } 253 254 if mediaType.IsZero() { 255 return nil, fmt.Errorf("failed to resolve media type for remote resource %q", uri) 256 } 257 258 resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + mediaType.FirstSuffix.FullSuffix 259 data := responseToData(res, false) 260 261 return c.rs.New( 262 resources.ResourceSourceDescriptor{ 263 MediaType: mediaType, 264 Data: data, 265 LazyPublish: true, 266 OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) { 267 return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil 268 }, 269 RelTargetFilename: filepath.Clean(resourceID), 270 }) 271 } 272 273 func (c *Client) validateFromRemoteArgs(uri string, options fromRemoteOptions) error { 274 if err := c.rs.ExecHelper.Sec().CheckAllowedHTTPURL(uri); err != nil { 275 return err 276 } 277 278 if err := c.rs.ExecHelper.Sec().CheckAllowedHTTPMethod(options.Method); err != nil { 279 return err 280 } 281 282 return nil 283 } 284 285 func calculateResourceID(uri string, optionsm map[string]any) string { 286 if key, found := maps.LookupEqualFold(optionsm, "key"); found { 287 return identity.HashString(key) 288 } 289 return identity.HashString(uri, optionsm) 290 } 291 292 func addDefaultHeaders(req *http.Request) { 293 if !hasHeaderKey(req.Header, "User-Agent") { 294 req.Header.Add("User-Agent", "Hugo Static Site Generator") 295 } 296 } 297 298 func addUserProvidedHeaders(headers map[string]any, req *http.Request) { 299 if headers == nil { 300 return 301 } 302 for key, val := range headers { 303 vals := types.ToStringSlicePreserveString(val) 304 for _, s := range vals { 305 req.Header.Add(key, s) 306 } 307 } 308 } 309 310 func hasHeaderValue(m http.Header, key, value string) bool { 311 var s []string 312 var ok bool 313 314 if s, ok = m[key]; !ok { 315 return false 316 } 317 318 for _, v := range s { 319 if v == value { 320 return true 321 } 322 } 323 return false 324 } 325 326 func hasHeaderKey(m http.Header, key string) bool { 327 _, ok := m[key] 328 return ok 329 } 330 331 type fromRemoteOptions struct { 332 Method string 333 Headers map[string]any 334 Body []byte 335 } 336 337 func (o fromRemoteOptions) BodyReader() io.Reader { 338 if o.Body == nil { 339 return nil 340 } 341 return bytes.NewBuffer(o.Body) 342 } 343 344 func (o fromRemoteOptions) NewRequest(url string) (*http.Request, error) { 345 req, err := http.NewRequest(o.Method, url, o.BodyReader()) 346 if err != nil { 347 return nil, err 348 } 349 350 // First add any user provided headers. 351 if o.Headers != nil { 352 addUserProvidedHeaders(o.Headers, req) 353 } 354 355 // Then add default headers not provided by the user. 356 addDefaultHeaders(req) 357 358 return req, nil 359 } 360 361 func decodeRemoteOptions(optionsm map[string]any) (fromRemoteOptions, error) { 362 options := fromRemoteOptions{ 363 Method: "GET", 364 } 365 366 err := mapstructure.WeakDecode(optionsm, &options) 367 if err != nil { 368 return options, err 369 } 370 options.Method = strings.ToUpper(options.Method) 371 372 return options, nil 373 }