github.com/linchen2chris/hugo@v0.0.0-20230307053224-cec209389705/resources/resource_factories/create/remote.go (about) 1 // Copyright 2021 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package create 15 16 import ( 17 "bufio" 18 "bytes" 19 "fmt" 20 "io" 21 "mime" 22 "net/http" 23 "net/http/httputil" 24 "net/url" 25 "path" 26 "path/filepath" 27 "strings" 28 29 "github.com/gohugoio/hugo/common/hugio" 30 "github.com/gohugoio/hugo/common/maps" 31 "github.com/gohugoio/hugo/common/types" 32 "github.com/gohugoio/hugo/identity" 33 "github.com/gohugoio/hugo/media" 34 "github.com/gohugoio/hugo/resources" 35 "github.com/gohugoio/hugo/resources/resource" 36 "github.com/mitchellh/mapstructure" 37 ) 38 39 type HTTPError struct { 40 error 41 Data map[string]any 42 43 StatusCode int 44 Body string 45 } 46 47 func responseToData(res *http.Response, readBody bool) map[string]any { 48 var body []byte 49 if readBody { 50 body, _ = io.ReadAll(res.Body) 51 } 52 53 m := map[string]any{ 54 "StatusCode": res.StatusCode, 55 "Status": res.Status, 56 "TransferEncoding": res.TransferEncoding, 57 "ContentLength": res.ContentLength, 58 "ContentType": res.Header.Get("Content-Type"), 59 } 60 61 if readBody { 62 m["Body"] = string(body) 63 } 64 65 return m 66 67 } 68 69 func toHTTPError(err error, res *http.Response, readBody bool) *HTTPError { 70 if err == nil { 71 panic("err is nil") 72 } 73 if res == nil { 74 return &HTTPError{ 75 error: err, 76 Data: map[string]any{}, 77 } 78 } 79 80 return &HTTPError{ 81 error: err, 82 Data: responseToData(res, readBody), 83 } 84 } 85 86 // FromRemote expects one or n-parts of a URL to a resource 87 // If you provide multiple parts they will be joined together to the final URL. 88 func (c *Client) FromRemote(uri string, optionsm map[string]any) (resource.Resource, error) { 89 rURL, err := url.Parse(uri) 90 if err != nil { 91 return nil, fmt.Errorf("failed to parse URL for resource %s: %w", uri, err) 92 } 93 94 method := "GET" 95 if s, ok := maps.LookupEqualFold(optionsm, "method"); ok { 96 method = strings.ToUpper(s.(string)) 97 } 98 isHeadMethod := method == "HEAD" 99 100 resourceID := calculateResourceID(uri, optionsm) 101 102 _, httpResponse, err := c.cacheGetResource.GetOrCreate(resourceID, func() (io.ReadCloser, error) { 103 options, err := decodeRemoteOptions(optionsm) 104 if err != nil { 105 return nil, fmt.Errorf("failed to decode options for resource %s: %w", uri, err) 106 } 107 if err := c.validateFromRemoteArgs(uri, options); err != nil { 108 return nil, err 109 } 110 111 req, err := options.NewRequest(uri) 112 if err != nil { 113 return nil, fmt.Errorf("failed to create request for resource %s: %w", uri, err) 114 } 115 116 res, err := c.httpClient.Do(req) 117 if err != nil { 118 return nil, err 119 } 120 defer res.Body.Close() 121 122 httpResponse, err := httputil.DumpResponse(res, true) 123 if err != nil { 124 return nil, toHTTPError(err, res, !isHeadMethod) 125 } 126 127 if res.StatusCode != http.StatusNotFound { 128 if res.StatusCode < 200 || res.StatusCode > 299 { 129 return nil, toHTTPError(fmt.Errorf("failed to fetch remote resource: %s", http.StatusText(res.StatusCode)), res, !isHeadMethod) 130 131 } 132 } 133 134 return hugio.ToReadCloser(bytes.NewReader(httpResponse)), nil 135 }) 136 if err != nil { 137 return nil, err 138 } 139 defer httpResponse.Close() 140 141 res, err := http.ReadResponse(bufio.NewReader(httpResponse), nil) 142 if err != nil { 143 return nil, err 144 } 145 defer res.Body.Close() 146 147 if res.StatusCode == http.StatusNotFound { 148 // Not found. This matches how looksup for local resources work. 149 return nil, nil 150 } 151 152 var ( 153 body []byte 154 mediaType media.Type 155 ) 156 // A response to a HEAD method should not have a body. If it has one anyway, that body must be ignored. 157 // See https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/HEAD 158 if !isHeadMethod && res.Body != nil { 159 body, err = io.ReadAll(res.Body) 160 if err != nil { 161 return nil, fmt.Errorf("failed to read remote resource %q: %w", uri, err) 162 } 163 } 164 165 filename := path.Base(rURL.Path) 166 if _, params, _ := mime.ParseMediaType(res.Header.Get("Content-Disposition")); params != nil { 167 if _, ok := params["filename"]; ok { 168 filename = params["filename"] 169 } 170 } 171 172 contentType := res.Header.Get("Content-Type") 173 174 if isHeadMethod { 175 // We have no body to work with, so we need to use the Content-Type header. 176 mediaType, _ = media.FromString(contentType) 177 } else { 178 179 var extensionHints []string 180 181 // mime.ExtensionsByType gives a long list of extensions for text/plain, 182 // just use ".txt". 183 if strings.HasPrefix(contentType, "text/plain") { 184 extensionHints = []string{".txt"} 185 } else { 186 exts, _ := mime.ExtensionsByType(contentType) 187 if exts != nil { 188 extensionHints = exts 189 } 190 } 191 192 // Look for a file extension. If it's .txt, look for a more specific. 193 if extensionHints == nil || extensionHints[0] == ".txt" { 194 if ext := path.Ext(filename); ext != "" { 195 extensionHints = []string{ext} 196 } 197 } 198 199 // Now resolve the media type primarily using the content. 200 mediaType = media.FromContent(c.rs.MediaTypes, extensionHints, body) 201 202 } 203 204 if mediaType.IsZero() { 205 return nil, fmt.Errorf("failed to resolve media type for remote resource %q", uri) 206 } 207 208 resourceID = filename[:len(filename)-len(path.Ext(filename))] + "_" + resourceID + mediaType.FirstSuffix.FullSuffix 209 data := responseToData(res, false) 210 211 return c.rs.New( 212 resources.ResourceSourceDescriptor{ 213 MediaType: mediaType, 214 Data: data, 215 LazyPublish: true, 216 OpenReadSeekCloser: func() (hugio.ReadSeekCloser, error) { 217 return hugio.NewReadSeekerNoOpCloser(bytes.NewReader(body)), nil 218 }, 219 RelTargetFilename: filepath.Clean(resourceID), 220 }) 221 } 222 223 func (c *Client) validateFromRemoteArgs(uri string, options fromRemoteOptions) error { 224 if err := c.rs.ExecHelper.Sec().CheckAllowedHTTPURL(uri); err != nil { 225 return err 226 } 227 228 if err := c.rs.ExecHelper.Sec().CheckAllowedHTTPMethod(options.Method); err != nil { 229 return err 230 } 231 232 return nil 233 } 234 235 func calculateResourceID(uri string, optionsm map[string]any) string { 236 if key, found := maps.LookupEqualFold(optionsm, "key"); found { 237 return identity.HashString(key) 238 } 239 return identity.HashString(uri, optionsm) 240 } 241 242 func addDefaultHeaders(req *http.Request) { 243 if !hasHeaderKey(req.Header, "User-Agent") { 244 req.Header.Add("User-Agent", "Hugo Static Site Generator") 245 } 246 } 247 248 func addUserProvidedHeaders(headers map[string]any, req *http.Request) { 249 if headers == nil { 250 return 251 } 252 for key, val := range headers { 253 vals := types.ToStringSlicePreserveString(val) 254 for _, s := range vals { 255 req.Header.Add(key, s) 256 } 257 } 258 } 259 260 func hasHeaderValue(m http.Header, key, value string) bool { 261 var s []string 262 var ok bool 263 264 if s, ok = m[key]; !ok { 265 return false 266 } 267 268 for _, v := range s { 269 if v == value { 270 return true 271 } 272 } 273 return false 274 } 275 276 func hasHeaderKey(m http.Header, key string) bool { 277 _, ok := m[key] 278 return ok 279 } 280 281 type fromRemoteOptions struct { 282 Method string 283 Headers map[string]any 284 Body []byte 285 } 286 287 func (o fromRemoteOptions) BodyReader() io.Reader { 288 if o.Body == nil { 289 return nil 290 } 291 return bytes.NewBuffer(o.Body) 292 } 293 294 func (o fromRemoteOptions) NewRequest(url string) (*http.Request, error) { 295 req, err := http.NewRequest(o.Method, url, o.BodyReader()) 296 if err != nil { 297 return nil, err 298 } 299 300 // First add any user provided headers. 301 if o.Headers != nil { 302 addUserProvidedHeaders(o.Headers, req) 303 } 304 305 // Then add default headers not provided by the user. 306 addDefaultHeaders(req) 307 308 return req, nil 309 } 310 311 func decodeRemoteOptions(optionsm map[string]any) (fromRemoteOptions, error) { 312 options := fromRemoteOptions{ 313 Method: "GET", 314 } 315 316 err := mapstructure.WeakDecode(optionsm, &options) 317 if err != nil { 318 return options, err 319 } 320 options.Method = strings.ToUpper(options.Method) 321 322 return options, nil 323 }