github.com/neohugo/neohugo@v0.123.8/resources/resource_factories/create/remote.go (about) 1 // Copyright 2021 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package create 15 16 import ( 17 "bufio" 18 "bytes" 19 "fmt" 20 "io" 21 "math/rand" 22 "mime" 23 "net/http" 24 "net/http/httputil" 25 "net/url" 26 "path" 27 "strings" 28 "time" 29 30 "github.com/neohugo/neohugo/common/hugio" 31 "github.com/neohugo/neohugo/common/maps" 32 "github.com/neohugo/neohugo/common/types" 33 "github.com/neohugo/neohugo/identity" 34 "github.com/neohugo/neohugo/media" 35 "github.com/neohugo/neohugo/resources" 36 "github.com/neohugo/neohugo/resources/resource" 37 38 "github.com/mitchellh/mapstructure" 39 ) 40 41 type HTTPError struct { 42 error 43 Data map[string]any 44 45 StatusCode int 46 Body string 47 } 48 49 func responseToData(res *http.Response, readBody bool) map[string]any { 50 var body []byte 51 if readBody { 52 body, _ = io.ReadAll(res.Body) 53 } 54 55 m := map[string]any{ 56 "StatusCode": res.StatusCode, 57 "Status": res.Status, 58 "TransferEncoding": res.TransferEncoding, 59 "ContentLength": res.ContentLength, 60 "ContentType": res.Header.Get("Content-Type"), 61 } 62 63 if readBody { 64 m["Body"] = string(body) 65 } 66 67 return m 68 } 69 70 func toHTTPError(err error, res *http.Response, readBody bool) *HTTPError { 71 if err == nil { 72 panic("err is nil") 73 } 74 if res == nil { 75 return &HTTPError{ 76 error: err, 77 Data: map[string]any{}, 78 } 79 } 80 81 return &HTTPError{ 82 error: err, 83 Data: responseToData(res, readBody), 84 } 85 } 86 87 var temporaryHTTPStatusCodes = map[int]bool{ 88 408: true, 89 429: true, 90 500: true, 91 502: true, 92 503: true, 93 504: true, 94 } 95 96 // FromRemote expects one or n-parts of a URL to a resource 97 // If you provide multiple parts they will be joined together to the final URL. 98 func (c *Client) FromRemote(uri string, optionsm map[string]any) (resource.Resource, error) { 99 rURL, err := url.Parse(uri) 100 if err != nil { 101 return nil, fmt.Errorf("failed to parse URL for resource %s: %w", uri, err) 102 } 103 104 method := "GET" 105 if s, ok := maps.LookupEqualFold(optionsm, "method"); ok { 106 method = strings.ToUpper(s.(string)) 107 } 108 isHeadMethod := method == "HEAD" 109 110 resourceID := calculateResourceID(uri, optionsm) 111 112 _, httpResponse, err := c.cacheGetResource.GetOrCreate(resourceID, func() (io.ReadCloser, error) { 113 options, err := decodeRemoteOptions(optionsm) 114 if err != nil { 115 return nil, fmt.Errorf("failed to decode options for resource %s: %w", uri, err) 116 } 117 if err := c.validateFromRemoteArgs(uri, options); err != nil { 118 return nil, err 119 } 120 121 var ( 122 start time.Time 123 nextSleep = time.Duration((rand.Intn(1000) + 100)) * time.Millisecond 124 nextSleepLimit = time.Duration(5) * time.Second 125 ) 126 127 for { 128 b, retry, err := func() ([]byte, bool, error) { 129 req, err := options.NewRequest(uri) 130 if err != nil { 131 return nil, false, fmt.Errorf("failed to create request for resource %s: %w", uri, err) 132 } 133 134 res, err := c.httpClient.Do(req) 135 if err != nil { 136 return nil, false, err 137 } 138 defer res.Body.Close() 139 140 if res.StatusCode != http.StatusNotFound { 141 if res.StatusCode < 200 || res.StatusCode > 299 { 142 return nil, temporaryHTTPStatusCodes[res.StatusCode], toHTTPError(fmt.Errorf("failed to fetch remote resource: %s", http.StatusText(res.StatusCode)), res, !isHeadMethod) 143 } 144 } 145 146 b, err := httputil.DumpResponse(res, true) 147 if err != nil { 148 return nil, false, toHTTPError(err, res, !isHeadMethod) 149 } 150 151 return b, false, nil 152 }() 153 if err != nil { 154 if retry { 155 if start.IsZero() { 156 start = time.Now() 157 } else if d := time.Since(start) + nextSleep; d >= c.rs.Cfg.Timeout() { 158 c.rs.Logger.Errorf("Retry timeout (configured to %s) fetching remote resource.", c.rs.Cfg.Timeout()) 159 return nil, err 160 } 161 time.Sleep(nextSleep) 162 if nextSleep < nextSleepLimit { 163 nextSleep *= 2 164 } 165 continue 166 } 167 return nil, err 168 } 169 170 return hugio.ToReadCloser(bytes.NewReader(b)), nil 171 172 } 173 }) 174 if err != nil { 175 return nil, err 176 } 177 defer httpResponse.Close() 178 179 res, err := http.ReadResponse(bufio.NewReader(httpResponse), nil) 180 if err != nil { 181 return nil, err 182 } 183 defer res.Body.Close() 184 185 if res.StatusCode == http.StatusNotFound { 186 // Not found. This matches how looksup for local resources work. 187 return nil, nil 188 } 189 190 var ( 191 body []byte 192 mediaType media.Type 193 ) 194 // A response to a HEAD method should not have a body. If it has one anyway, that body must be ignored. 195 // See https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/HEAD 196 if !isHeadMethod && res.Body != nil { 197 body, err = io.ReadAll(res.Body) 198 if err != nil { 199 return nil, fmt.Errorf("failed to read remote resource %q: %w", uri, err) 200 } 201 } 202 203 filename := path.Base(rURL.Path) 204 if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil { 205 if _, ok := params["filename"]; ok { 206 filename = params["filename"] 207 } 208 } 209 210 contentType := res.Header.Get("Content-Type") 211 212 // For HEAD requests we have no body to work with, so we need to use the Content-Type header. 213 if isHeadMethod || c.rs.ExecHelper.Sec().HTTP.MediaTypes.Accept(contentType) { 214 var found bool 215 mediaType, found = c.rs.MediaTypes().GetByType(contentType) 216 if !found { 217 // A media type not configured in Hugo, just create one from the content type string. 218 mediaType, _ = media.FromString(contentType) 219 } 220 } 221 222 if mediaType.IsZero() { 223 224 var extensionHints []string 225 226 // mime.ExtensionsByType gives a long list of extensions for text/plain, 227 // just use ".txt". 228 if strings.HasPrefix(contentType, "text/plain") { 229 extensionHints = []string{".txt"} 230 } else { 231 exts, _ := mime.ExtensionsByType(contentType) 232 if exts != nil { 233 extensionHints = exts 234 } 235 } 236 237 // Look for a file extension. If it's .txt, look for a more specific. 238 if extensionHints == nil || extensionHints[0] == ".txt" { 239 if ext := path.Ext(filename); ext != "" { 240 extensionHints = []string{ext} 241 } 242 } 243 244 // Now resolve the media type primarily using the content. 245 mediaType = media.FromContent(c.rs.MediaTypes(), extensionHints, body) 246 247 } 248 249 if mediaType.IsZero() { 250 return nil, fmt.Errorf("failed to resolve media type for remote resource %q", uri) 251 } 252 253 resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + mediaType.FirstSuffix.FullSuffix 254 data := responseToData(res, false) 255 256 return c.rs.NewResource( 257 resources.ResourceSourceDescriptor{ 258 MediaType: mediaType, 259 Data: data, 260 GroupIdentity: identity.StringIdentity(resourceID), 261 LazyPublish: true, 262 OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) { 263 return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil 264 }, 265 TargetPath: resourceID, 266 }) 267 } 268 269 func (c *Client) validateFromRemoteArgs(uri string, options fromRemoteOptions) error { 270 if err := c.rs.ExecHelper.Sec().CheckAllowedHTTPURL(uri); err != nil { 271 return err 272 } 273 274 if err := c.rs.ExecHelper.Sec().CheckAllowedHTTPMethod(options.Method); err != nil { 275 return err 276 } 277 278 return nil 279 } 280 281 func calculateResourceID(uri string, optionsm map[string]any) string { 282 if key, found := maps.LookupEqualFold(optionsm, "key"); found { 283 return identity.HashString(key) 284 } 285 return identity.HashString(uri, optionsm) 286 } 287 288 func addDefaultHeaders(req *http.Request) { 289 if !hasHeaderKey(req.Header, "User-Agent") { 290 req.Header.Add("User-Agent", "Hugo Static Site Generator") 291 } 292 } 293 294 func addUserProvidedHeaders(headers map[string]any, req *http.Request) { 295 if headers == nil { 296 return 297 } 298 for key, val := range headers { 299 vals := types.ToStringSlicePreserveString(val) 300 for _, s := range vals { 301 req.Header.Add(key, s) 302 } 303 } 304 } 305 306 func hasHeaderKey(m http.Header, key string) bool { 307 _, ok := m[key] 308 return ok 309 } 310 311 type fromRemoteOptions struct { 312 Method string 313 Headers map[string]any 314 Body []byte 315 } 316 317 func (o fromRemoteOptions) BodyReader() io.Reader { 318 if o.Body == nil { 319 return nil 320 } 321 return bytes.NewBuffer(o.Body) 322 } 323 324 func (o fromRemoteOptions) NewRequest(url string) (*http.Request, error) { 325 req, err := http.NewRequest(o.Method, url, o.BodyReader()) 326 if err != nil { 327 return nil, err 328 } 329 330 // First add any user provided headers. 331 if o.Headers != nil { 332 addUserProvidedHeaders(o.Headers, req) 333 } 334 335 // Then add default headers not provided by the user. 336 addDefaultHeaders(req) 337 338 return req, nil 339 } 340 341 func decodeRemoteOptions(optionsm map[string]any) (fromRemoteOptions, error) { 342 options := fromRemoteOptions{ 343 Method: "GET", 344 } 345 346 err := mapstructure.WeakDecode(optionsm, &options) 347 if err != nil { 348 return options, err 349 } 350 options.Method = strings.ToUpper(options.Method) 351 352 return options, nil 353 }