github.com/aarzilli/tools@v0.0.0-20151123112009-0d27094f75e0/net/http/fetch/unified_requester.go (about)

     1  // Package fetch performs a http request and returns the byte slice,
     2  // also operating on google app engine.
     3  package fetch
     4  
     5  import (
     6  	"fmt"
     7  	"io/ioutil"
     8  	"log"
     9  	"net/http"
    10  	"net/url"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/pbberlin/tools/appengine/util_appengine"
    15  	"github.com/pbberlin/tools/stringspb"
    16  	"google.golang.org/appengine"
    17  
    18  	oldAE "appengine"
    19  	oldFetch "appengine/urlfetch"
    20  )
    21  
    22  var MsgNoRdirects = "redirect cancelled"
    23  var ErrCancelRedirects = fmt.Errorf(MsgNoRdirects)
    24  var ErrNoContext = fmt.Errorf("gaeReq did not yield a context; deadline exceeded?")
    25  
    26  type Options struct {
    27  	Req *http.Request
    28  
    29  	URL string
    30  
    31  	RedirectHandling int // 1 => call off upon redirects
    32  
    33  	LogLevel int
    34  
    35  	KnownProtocol                     string
    36  	ForceHTTPSEvenOnDevelopmentServer bool
    37  }
    38  
    39  // Response info
    40  type Info struct {
    41  	URL    *url.URL
    42  	Mod    time.Time
    43  	Status int
    44  	Msg    string
    45  }
    46  
    47  // UrlGetter universal http getter for app engine and standalone go programs.
    48  // Previously response was returned. Forgot why. Dropped it.
    49  func UrlGetter(gaeReq *http.Request, options Options) (
    50  	[]byte, Info, error,
    51  ) {
    52  
    53  	options.LogLevel = 2
    54  
    55  	var err error
    56  	var inf Info = Info{}
    57  
    58  	if options.LogLevel > 0 {
    59  		if options.Req != nil {
    60  			inf.Msg += fmt.Sprintf("orig req url: %#v\n", options.Req.URL.String())
    61  		} else {
    62  			inf.Msg += fmt.Sprintf("orig str url: %#v\n", options.URL)
    63  		}
    64  	}
    65  
    66  	//
    67  	// Either take provided request
    68  	// Or build one from options.URL
    69  	if options.Req == nil {
    70  		ourl, err := URLFromString(options.URL) // Normalize
    71  		if err != nil {
    72  			return nil, inf, err
    73  		}
    74  		options.URL = ourl.String()
    75  		options.Req, err = http.NewRequest("GET", options.URL, nil)
    76  		if err != nil {
    77  			return nil, inf, err
    78  		}
    79  	} else {
    80  		if options.Req.URL.Scheme == "" {
    81  			options.Req.URL.Scheme = "https"
    82  		}
    83  	}
    84  	r := options.Req
    85  
    86  	if len(options.KnownProtocol) > 1 {
    87  		if strings.HasSuffix(options.KnownProtocol, ":") {
    88  			options.KnownProtocol = strings.TrimSuffix(options.KnownProtocol, ":")
    89  		}
    90  		if options.KnownProtocol == "http" || options.KnownProtocol == "https" {
    91  			r.URL.Scheme = options.KnownProtocol
    92  			inf.Msg += fmt.Sprintf("Using known protocol %q\n", options.KnownProtocol)
    93  		}
    94  	}
    95  
    96  	//
    97  	// Unifiy appengine plain http.client
    98  	client := &http.Client{}
    99  	if gaeReq == nil {
   100  		client.Timeout = time.Duration(5 * time.Second) // GAE does not allow
   101  	} else {
   102  		c := util_appengine.SafelyExtractGaeContext(gaeReq)
   103  		if c != nil {
   104  
   105  			ctxOld := oldAE.NewContext(gaeReq)
   106  			client = oldFetch.Client(ctxOld)
   107  
   108  			// this does not prevent urlfetch: SSL_CERTIFICATE_ERROR
   109  			// it merely leads to err = "DEADLINE_EXCEEDED"
   110  			tr := oldFetch.Transport{Context: ctxOld, AllowInvalidServerCertificate: true}
   111  			// thus
   112  			tr = oldFetch.Transport{Context: ctxOld, AllowInvalidServerCertificate: false}
   113  
   114  			tr.Deadline = 20 * time.Second // only possible on aeOld
   115  
   116  			client.Transport = &tr
   117  			// client.Timeout = 20 * time.Second // also not in google.golang.org/appengine/urlfetch
   118  
   119  		} else {
   120  			return nil, inf, ErrNoContext
   121  		}
   122  
   123  		// appengine dev server => always fallback to http
   124  		if c != nil && appengine.IsDevAppServer() && !options.ForceHTTPSEvenOnDevelopmentServer {
   125  			r.URL.Scheme = "http"
   126  		}
   127  	}
   128  
   129  	inf.URL = r.URL
   130  
   131  	if options.RedirectHandling == 1 {
   132  		client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
   133  
   134  			if len(via) == 1 && req.URL.Path == via[0].URL.Path+"/" {
   135  				// allow redirect from /gesundheit to /gesundheit/
   136  				return nil
   137  			}
   138  
   139  			spath := "\n"
   140  			for _, v := range via {
   141  				spath += v.URL.Path + "\n"
   142  			}
   143  			spath += req.URL.Path + "\n"
   144  			return fmt.Errorf("%v %v", MsgNoRdirects, spath)
   145  		}
   146  	}
   147  
   148  	if options.LogLevel > 0 {
   149  		inf.Msg += fmt.Sprintf("url standardized to %q  %q %q \n", r.URL.Scheme, r.URL.Host, r.URL.RequestURI())
   150  	}
   151  
   152  	//
   153  	//
   154  	// Respond to test.economist.com directly from memory
   155  	if _, ok := TestData[r.URL.Host+r.URL.Path]; ok {
   156  		return TestData[r.URL.Host+r.URL.Path], inf, nil
   157  	}
   158  
   159  	// The actual call
   160  	// =============================
   161  
   162  	resp, err := client.Do(r)
   163  
   164  	// Swallow redirect errors
   165  	if err != nil {
   166  		if options.RedirectHandling == 1 {
   167  			serr := err.Error()
   168  			if strings.Contains(serr, MsgNoRdirects) {
   169  				bts := []byte(serr)
   170  				inf.Mod = time.Now().Add(-10 * time.Minute)
   171  				return bts, inf, nil
   172  			}
   173  		}
   174  	}
   175  
   176  	isHTTPSProblem := false
   177  	if err != nil {
   178  		isHTTPSProblem = strings.Contains(err.Error(), "SSL_CERTIFICATE_ERROR") ||
   179  			strings.Contains(err.Error(), "tls: oversized record received with length")
   180  	}
   181  
   182  	// Under narrow conditions => fallback to http
   183  	if err != nil {
   184  		if isHTTPSProblem && r.URL.Scheme == "https" && r.Method == "GET" {
   185  			r.URL.Scheme = "http"
   186  			var err2nd error
   187  			resp, err2nd = client.Do(r)
   188  			// while protocol http may go through
   189  			// next obstacle might be - again - a redirect error:
   190  			if err2nd != nil {
   191  				if options.RedirectHandling == 1 {
   192  					serr := err2nd.Error()
   193  					if strings.Contains(serr, MsgNoRdirects) {
   194  						bts := []byte(serr)
   195  						inf.Mod = time.Now().Add(-10 * time.Minute)
   196  						addFallBackSuccessInfo(options, &inf, r, err)
   197  						return bts, inf, nil
   198  					}
   199  				}
   200  
   201  				return nil, inf, fmt.Errorf("GET fallback to http failed with %v", err2nd)
   202  			}
   203  			addFallBackSuccessInfo(options, &inf, r, err)
   204  			err = nil // CLEAR error
   205  		}
   206  	}
   207  
   208  	//
   209  	// Final error handler
   210  	//
   211  	if err != nil {
   212  		hintAE := ""
   213  		if isHTTPSProblem && r.URL.Scheme == "https" {
   214  			// Not GET but POST:
   215  			// We cannot do a fallback for a post request - the r.Body.Reader is consumed
   216  			// options.r.URL.Scheme = "http"
   217  			// resp, err = client.Do(options.Req)
   218  			return nil, inf, fmt.Errorf("Cannot do https requests. Possible reason: Dev server: %v", err)
   219  		} else if strings.Contains(
   220  			err.Error(),
   221  			"net/http: Client Transport of type init.failingTransport doesn't support CancelRequest; Timeout not supported",
   222  		) {
   223  			hintAE = "\nDid you forget to submit the AE Request?\n"
   224  		}
   225  		return nil, inf, fmt.Errorf("request failed: %v - %v", err, hintAE)
   226  	}
   227  
   228  	//
   229  	// We got response, but
   230  	// explicit bad response from server
   231  	if resp.StatusCode != http.StatusOK {
   232  
   233  		if resp.StatusCode == http.StatusBadRequest || // 400
   234  			resp.StatusCode == http.StatusNotFound || // 404
   235  			false {
   236  			dmp := ""
   237  			for k, v := range resp.Header {
   238  				dmp += fmt.Sprintf("key: %v - val %v\n", k, v)
   239  			}
   240  			dmp = ""
   241  			dmp += stringspb.IndentedDump(r.URL)
   242  
   243  			bts, errRd := ioutil.ReadAll(resp.Body)
   244  			if errRd != nil {
   245  				return nil, inf, fmt.Errorf("cannot read resp body: %v", errRd)
   246  			}
   247  			if len(bts) > 2*1024 {
   248  				btsApdx := append([]byte(" ...omitted... "), bts[len(bts)-100:]...)
   249  				bts = append(bts[2*1024:], btsApdx...)
   250  			}
   251  			defer resp.Body.Close()
   252  
   253  			err2 := fmt.Errorf("resp %v: %v \n%v \n<pre>%s</pre>", resp.StatusCode, r.URL.String(), dmp, bts)
   254  
   255  			if r.URL.Path == "" {
   256  				r.URL.Path = "/"
   257  			}
   258  			var err2nd error
   259  			resp, err2nd = client.Do(r)
   260  			if err2nd != nil {
   261  				return nil, inf, fmt.Errorf("again error %v \n%v", err2nd, err2)
   262  			}
   263  			if resp.StatusCode != http.StatusOK {
   264  				inf.Status = resp.StatusCode
   265  				return nil, inf, fmt.Errorf("again Status NotOK %v \n%v", resp.StatusCode, err2)
   266  			}
   267  			log.Printf("successful retry with '/' to %v after %v\n", r.URL.String(), err)
   268  			err = nil // CLEAR error
   269  
   270  			// return nil, inf, err2
   271  
   272  		} else {
   273  			return nil, inf, fmt.Errorf("bad http resp code: %v - %v", resp.StatusCode, r.URL.String())
   274  		}
   275  	}
   276  
   277  	bts, err := ioutil.ReadAll(resp.Body)
   278  	if err != nil {
   279  		return nil, inf, fmt.Errorf("cannot read resp body: %v", err)
   280  	}
   281  	defer resp.Body.Close()
   282  
   283  	// time stamp
   284  	var tlm time.Time // time last modified
   285  	lm := resp.Header.Get("Last-Modified")
   286  	if lm != "" {
   287  		tlm, err = time.Parse(time.RFC1123, lm) // Last-Modified: Sat, 29 Aug 2015 21:15:39 GMT
   288  		if err != nil {
   289  			tlm, err = time.Parse(time.RFC1123Z, lm) // with numeric time zone
   290  			if err != nil {
   291  				var zeroTime time.Time
   292  				tlm = zeroTime
   293  			}
   294  		}
   295  	}
   296  	inf.Mod = tlm
   297  	// log.Printf("    hdr  %v %v\n", lm, tlm.Format(time.ANSIC))
   298  
   299  	return bts, inf, nil
   300  
   301  }
   302  
   303  func addFallBackSuccessInfo(options Options, inf *Info, r *http.Request, err error) {
   304  	if options.LogLevel > 0 {
   305  		inf.Msg += fmt.Sprintf("\tsuccessful fallback to http %v", r.URL.String())
   306  		inf.Msg += fmt.Sprintf("\tafter %v\n", err)
   307  	}
   308  
   309  }