github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/fetch/unified_requester.go (about) 1 // Package fetch performs a http request and returns the byte slice, 2 // also operating on google app engine. 3 package fetch 4 5 import ( 6 "fmt" 7 "io/ioutil" 8 "log" 9 "net/http" 10 "net/url" 11 "strings" 12 "time" 13 14 "github.com/pbberlin/tools/appengine/util_appengine" 15 "github.com/pbberlin/tools/stringspb" 16 "google.golang.org/appengine" 17 18 oldAE "appengine" 19 oldFetch "appengine/urlfetch" 20 ) 21 22 var MsgNoRdirects = "redirect cancelled" 23 var ErrCancelRedirects = fmt.Errorf(MsgNoRdirects) 24 var ErrNoContext = fmt.Errorf("gaeReq did not yield a context; deadline exceeded?") 25 26 type Options struct { 27 Req *http.Request 28 29 URL string 30 31 RedirectHandling int // 1 => call off upon redirects 32 33 LogLevel int 34 35 KnownProtocol string 36 ForceHTTPSEvenOnDevelopmentServer bool 37 } 38 39 // Response info 40 type Info struct { 41 URL *url.URL 42 Mod time.Time 43 Status int 44 Msg string 45 } 46 47 // UrlGetter universal http getter for app engine and standalone go programs. 48 // Previously response was returned. Forgot why. Dropped it. 49 func UrlGetter(gaeReq *http.Request, options Options) ( 50 []byte, Info, error, 51 ) { 52 53 options.LogLevel = 2 54 55 var err error 56 var inf Info = Info{} 57 58 if options.LogLevel > 0 { 59 if options.Req != nil { 60 inf.Msg += fmt.Sprintf("orig req url: %#v\n", options.Req.URL.String()) 61 } else { 62 inf.Msg += fmt.Sprintf("orig str url: %#v\n", options.URL) 63 } 64 } 65 66 // 67 // Either take provided request 68 // Or build one from options.URL 69 if options.Req == nil { 70 ourl, err := URLFromString(options.URL) // Normalize 71 if err != nil { 72 return nil, inf, err 73 } 74 options.URL = ourl.String() 75 options.Req, err = http.NewRequest("GET", options.URL, nil) 76 if err != nil { 77 return nil, inf, err 78 } 79 } else { 80 if options.Req.URL.Scheme == "" { 81 options.Req.URL.Scheme = "https" 82 } 83 } 84 r := options.Req 85 86 if len(options.KnownProtocol) > 1 { 87 if strings.HasSuffix(options.KnownProtocol, ":") { 88 options.KnownProtocol = strings.TrimSuffix(options.KnownProtocol, ":") 89 } 90 if options.KnownProtocol == "http" || options.KnownProtocol == "https" { 91 r.URL.Scheme = options.KnownProtocol 92 inf.Msg += fmt.Sprintf("Using known protocol %q\n", options.KnownProtocol) 93 } 94 } 95 96 // 97 // Unifiy appengine plain http.client 98 client := &http.Client{} 99 if gaeReq == nil { 100 client.Timeout = time.Duration(5 * time.Second) // GAE does not allow 101 } else { 102 c := util_appengine.SafelyExtractGaeContext(gaeReq) 103 if c != nil { 104 105 ctxOld := oldAE.NewContext(gaeReq) 106 client = oldFetch.Client(ctxOld) 107 108 // this does not prevent urlfetch: SSL_CERTIFICATE_ERROR 109 // it merely leads to err = "DEADLINE_EXCEEDED" 110 tr := oldFetch.Transport{Context: ctxOld, AllowInvalidServerCertificate: true} 111 // thus 112 tr = oldFetch.Transport{Context: ctxOld, AllowInvalidServerCertificate: false} 113 114 tr.Deadline = 20 * time.Second // only possible on aeOld 115 116 client.Transport = &tr 117 // client.Timeout = 20 * time.Second // also not in google.golang.org/appengine/urlfetch 118 119 } else { 120 return nil, inf, ErrNoContext 121 } 122 123 // appengine dev server => always fallback to http 124 if c != nil && appengine.IsDevAppServer() && !options.ForceHTTPSEvenOnDevelopmentServer { 125 r.URL.Scheme = "http" 126 } 127 } 128 129 inf.URL = r.URL 130 131 if options.RedirectHandling == 1 { 132 client.CheckRedirect = func(req *http.Request, via []*http.Request) error { 133 134 if len(via) == 1 && req.URL.Path == via[0].URL.Path+"/" { 135 // allow redirect from /gesundheit to /gesundheit/ 136 return nil 137 } 138 139 spath := "\n" 140 for _, v := range via { 141 spath += v.URL.Path + "\n" 142 } 143 spath += req.URL.Path + "\n" 144 return fmt.Errorf("%v %v", MsgNoRdirects, spath) 145 } 146 } 147 148 if options.LogLevel > 0 { 149 inf.Msg += fmt.Sprintf("url standardized to %q %q %q \n", r.URL.Scheme, r.URL.Host, r.URL.RequestURI()) 150 } 151 152 // 153 // 154 // Respond to test.economist.com directly from memory 155 if _, ok := TestData[r.URL.Host+r.URL.Path]; ok { 156 return TestData[r.URL.Host+r.URL.Path], inf, nil 157 } 158 159 // The actual call 160 // ============================= 161 162 resp, err := client.Do(r) 163 164 // Swallow redirect errors 165 if err != nil { 166 if options.RedirectHandling == 1 { 167 serr := err.Error() 168 if strings.Contains(serr, MsgNoRdirects) { 169 bts := []byte(serr) 170 inf.Mod = time.Now().Add(-10 * time.Minute) 171 return bts, inf, nil 172 } 173 } 174 } 175 176 isHTTPSProblem := false 177 if err != nil { 178 isHTTPSProblem = strings.Contains(err.Error(), "SSL_CERTIFICATE_ERROR") || 179 strings.Contains(err.Error(), "tls: oversized record received with length") 180 } 181 182 // Under narrow conditions => fallback to http 183 if err != nil { 184 if isHTTPSProblem && r.URL.Scheme == "https" && r.Method == "GET" { 185 r.URL.Scheme = "http" 186 var err2nd error 187 resp, err2nd = client.Do(r) 188 // while protocol http may go through 189 // next obstacle might be - again - a redirect error: 190 if err2nd != nil { 191 if options.RedirectHandling == 1 { 192 serr := err2nd.Error() 193 if strings.Contains(serr, MsgNoRdirects) { 194 bts := []byte(serr) 195 inf.Mod = time.Now().Add(-10 * time.Minute) 196 addFallBackSuccessInfo(options, &inf, r, err) 197 return bts, inf, nil 198 } 199 } 200 201 return nil, inf, fmt.Errorf("GET fallback to http failed with %v", err2nd) 202 } 203 addFallBackSuccessInfo(options, &inf, r, err) 204 err = nil // CLEAR error 205 } 206 } 207 208 // 209 // Final error handler 210 // 211 if err != nil { 212 hintAE := "" 213 if isHTTPSProblem && r.URL.Scheme == "https" { 214 // Not GET but POST: 215 // We cannot do a fallback for a post request - the r.Body.Reader is consumed 216 // options.r.URL.Scheme = "http" 217 // resp, err = client.Do(options.Req) 218 return nil, inf, fmt.Errorf("Cannot do https requests. Possible reason: Dev server: %v", err) 219 } else if strings.Contains( 220 err.Error(), 221 "net/http: Client Transport of type init.failingTransport doesn't support CancelRequest; Timeout not supported", 222 ) { 223 hintAE = "\nDid you forget to submit the AE Request?\n" 224 } 225 return nil, inf, fmt.Errorf("request failed: %v - %v", err, hintAE) 226 } 227 228 // 229 // We got response, but 230 // explicit bad response from server 231 if resp.StatusCode != http.StatusOK { 232 233 if resp.StatusCode == http.StatusBadRequest || // 400 234 resp.StatusCode == http.StatusNotFound || // 404 235 false { 236 dmp := "" 237 for k, v := range resp.Header { 238 dmp += fmt.Sprintf("key: %v - val %v\n", k, v) 239 } 240 dmp = "" 241 dmp += stringspb.IndentedDump(r.URL) 242 243 bts, errRd := ioutil.ReadAll(resp.Body) 244 if errRd != nil { 245 return nil, inf, fmt.Errorf("cannot read resp body: %v", errRd) 246 } 247 if len(bts) > 2*1024 { 248 btsApdx := append([]byte(" ...omitted... "), bts[len(bts)-100:]...) 249 bts = append(bts[2*1024:], btsApdx...) 250 } 251 defer resp.Body.Close() 252 253 err2 := fmt.Errorf("resp %v: %v \n%v \n<pre>%s</pre>", resp.StatusCode, r.URL.String(), dmp, bts) 254 255 if r.URL.Path == "" { 256 r.URL.Path = "/" 257 } 258 var err2nd error 259 resp, err2nd = client.Do(r) 260 if err2nd != nil { 261 return nil, inf, fmt.Errorf("again error %v \n%v", err2nd, err2) 262 } 263 if resp.StatusCode != http.StatusOK { 264 inf.Status = resp.StatusCode 265 return nil, inf, fmt.Errorf("again Status NotOK %v \n%v", resp.StatusCode, err2) 266 } 267 log.Printf("successful retry with '/' to %v after %v\n", r.URL.String(), err) 268 err = nil // CLEAR error 269 270 // return nil, inf, err2 271 272 } else { 273 return nil, inf, fmt.Errorf("bad http resp code: %v - %v", resp.StatusCode, r.URL.String()) 274 } 275 } 276 277 bts, err := ioutil.ReadAll(resp.Body) 278 if err != nil { 279 return nil, inf, fmt.Errorf("cannot read resp body: %v", err) 280 } 281 defer resp.Body.Close() 282 283 // time stamp 284 var tlm time.Time // time last modified 285 lm := resp.Header.Get("Last-Modified") 286 if lm != "" { 287 tlm, err = time.Parse(time.RFC1123, lm) // Last-Modified: Sat, 29 Aug 2015 21:15:39 GMT 288 if err != nil { 289 tlm, err = time.Parse(time.RFC1123Z, lm) // with numeric time zone 290 if err != nil { 291 var zeroTime time.Time 292 tlm = zeroTime 293 } 294 } 295 } 296 inf.Mod = tlm 297 // log.Printf(" hdr %v %v\n", lm, tlm.Format(time.ANSIC)) 298 299 return bts, inf, nil 300 301 } 302 303 func addFallBackSuccessInfo(options Options, inf *Info, r *http.Request, err error) { 304 if options.LogLevel > 0 { 305 inf.Msg += fmt.Sprintf("\tsuccessful fallback to http %v", r.URL.String()) 306 inf.Msg += fmt.Sprintf("\tafter %v\n", err) 307 } 308 309 }