github.com/pbberlin/tools@v0.0.0-20160910141205-7aa5421c2169/net/http/proxy1/fetch_and_display.go (about)

     1  // Package proxy1 forwards html pages, simplifying their dom structure;
     2  // it is a wrapper around domclean2 for actual cleansing and proxification;
     3  // containing tamper-monkey javascript popup code.
     4  package proxy1
     5  
     6  import (
     7  	"bytes"
     8  	"fmt"
     9  	"mime"
    10  	"net/http"
    11  	"path"
    12  	"strings"
    13  
    14  	_ "html"
    15  
    16  	"github.com/pbberlin/tools/appengine/util_appengine"
    17  	"github.com/pbberlin/tools/net/http/domclean2"
    18  	"github.com/pbberlin/tools/net/http/fetch"
    19  	"github.com/pbberlin/tools/net/http/loghttp"
    20  	"github.com/pbberlin/tools/net/http/routes"
    21  	"github.com/pbberlin/tools/net/http/tplx"
    22  	"golang.org/x/net/html"
    23  )
    24  
    25  var insertNewlines = strings.NewReplacer(
    26  	"<head", "\n<head",
    27  	"</head>", "</head>\n",
    28  	"<meta", "\n<meta",
    29  	"</script>", "</script>\n",
    30  	"</style>", "</style>\n",
    31  	"</div>", "</div>\n",
    32  	"<style", "\n<style",
    33  	"<script", "\n<script")
    34  
    35  var replTabsNewline = strings.NewReplacer("\n", " ", "\t", " ")
    36  var undouble = strings.NewReplacer("\n\n\n", "\n", "\n\n", "\n")
    37  
    38  const c_formFetchUrl = `
    39  
    40  	<style> .ib { display:inline-block; }</style>
    41  	<form action="{{.protocol}}://{{.host}}{{.path}}" method="post" >
    42  	  <div style='margin:8px;'>
    43  
    44  	  	<span class='ib' style='width:99%'>
    45  	  		URL<br />
    46  	  		<input id='i1' name="{{.name}}"	style='width:90%;height:96px;'
    47  	  			size="80"  
    48  	  	  		xxvalue="{{.val}}"
    49  	  	  		value=""
    50  	  		>
    51  	  	 </span>
    52  
    53    		<br/>
    54  
    55  		<span class='ib' style='width:99%'>&nbsp;</span>
    56  		  <a href='#' onclick='document.getElementById("i1").value=""' style='font-size:42px;' 
    57  		  >Clear</a>
    58  		</span>
    59  
    60    		<br/>
    61    		<br/>
    62    		<br/>
    63  
    64  		<span class='ib' style='width:99%'>
    65  			Put into pre tags 
    66  			<input name="renderInPre"	size="4"	value='' ><br/>
    67  		</span>
    68  
    69    		<br/>
    70  	
    71  		<input type="submit" value="Fetch" accesskey='f'  style='width:90%;height:96px;'>
    72  
    73  	</div>
    74  	</form>
    75  
    76  `
    77  
    78  // handleFetchURL either displays a form for requesting an url
    79  // or it returns the URL“s contents.
    80  func handleFetchURL(w http.ResponseWriter, r *http.Request, m map[string]interface{}) {
    81  
    82  	lg, b := loghttp.BuffLoggerUniversal(w, r)
    83  	_ = b
    84  
    85  	// on live server => always use https
    86  	if r.URL.Scheme != "https" && !util_appengine.IsLocalEnviron() {
    87  		r.URL.Scheme = "https"
    88  		r.URL.Host = r.Host
    89  		lg("lo - redirect %v", r.URL.String())
    90  		http.Redirect(w, r, r.URL.String(), http.StatusFound)
    91  	}
    92  
    93  	/*
    94  		To distinguish between posted and getted value,
    95  		we check the "post-only" slice of values first.
    96  		If nothing's there, but FormValue *has* a value,
    97  		then it was "getted", otherwise "posted"
    98  	*/
    99  	rURL := ""
   100  	urlAs := ""
   101  	err := r.ParseForm()
   102  	lg(err)
   103  	if r.PostFormValue(routes.URLParamKey) != "" {
   104  		urlAs += "url posted "
   105  		rURL = r.PostFormValue(routes.URLParamKey)
   106  	}
   107  
   108  	if r.FormValue(routes.URLParamKey) != "" {
   109  		if rURL == "" {
   110  			urlAs += "url getted "
   111  			rURL = r.FormValue(routes.URLParamKey)
   112  		}
   113  	}
   114  	// lg("received %v:  %q", urlAs, rURL)
   115  
   116  	if len(rURL) == 0 {
   117  
   118  		tplAdder, tplExec := tplx.FuncTplBuilder(w, r)
   119  		tplAdder("n_html_title", "Fetch some http data", nil)
   120  
   121  		m := map[string]string{
   122  			"protocol": "https",
   123  			"host":     r.Host, // not  fetch.HostFromReq(r)
   124  			"path":     routes.ProxifyURI,
   125  			"name":     routes.URLParamKey,
   126  			"val":      "google.com",
   127  		}
   128  		if util_appengine.IsLocalEnviron() {
   129  			m["protocol"] = "http"
   130  		}
   131  		tplAdder("n_cont_0", c_formFetchUrl, m)
   132  		tplExec(w, r)
   133  
   134  	} else {
   135  
   136  		r.Header.Set("X-Custom-Header-Counter", "nocounter")
   137  
   138  		bts, inf, err := fetch.UrlGetter(r, fetch.Options{URL: rURL})
   139  		lg(err)
   140  
   141  		tp := mime.TypeByExtension(path.Ext(inf.URL.Path))
   142  		if false {
   143  			ext := path.Ext(rURL)
   144  			ext = strings.ToLower(ext)
   145  			tp = mime.TypeByExtension(ext)
   146  		}
   147  		w.Header().Set("Content-Type", tp)
   148  		// w.Header().Set("Content-type", "text/html; charset=latin-1")
   149  
   150  		if r.FormValue("dbg") != "" {
   151  			w.Header().Set("Content-type", "text/html; charset=utf-8")
   152  			fmt.Fprintf(w, "%s<br>\n  %s<br>\n %v", inf.URL.Path, tp, inf.URL.String())
   153  			return
   154  		}
   155  
   156  		opts := domclean2.CleaningOptions{Proxify: true}
   157  		opts.Beautify = true // "<a> Linktext without trailing space"
   158  		opts.RemoteHost = fetch.HostFromStringUrl(rURL)
   159  
   160  		// opts.ProxyHost = routes.AppHost()
   161  		opts.ProxyHost = fetch.HostFromReq(r)
   162  		if !util_appengine.IsLocalEnviron() {
   163  			opts.ProxyHost = fetch.HostFromReq(r)
   164  		}
   165  
   166  		doc, err := domclean2.DomClean(bts, opts)
   167  
   168  		var bufRend bytes.Buffer
   169  		err = html.Render(&bufRend, doc)
   170  		lg(err)
   171  		w.Write(bufRend.Bytes())
   172  
   173  	}
   174  
   175  }
   176  
   177  func init() {
   178  	http.HandleFunc(routes.ProxifyURI, loghttp.Adapter(handleFetchURL))
   179  }