github.com/coreos/goproxy@v0.0.0-20190513173959-f8dc2d7ba04e/ext/html/html.go (about)

     1  // extension to goproxy that will allow you to easily filter web browser related content.
     2  package goproxy_html
     3  
     4  import (
     5  	"bytes"
     6  	"errors"
     7  	"io"
     8  	"io/ioutil"
     9  	"net/http"
    10  	"strings"
    11  
    12  	"github.com/rogpeppe/go-charset/charset"
    13  	_ "github.com/rogpeppe/go-charset/data"
    14  	"github.com/elazarl/goproxy"
    15  )
    16  
    17  var IsHtml goproxy.RespCondition = goproxy.ContentTypeIs("text/html")
    18  
    19  var IsCss goproxy.RespCondition = goproxy.ContentTypeIs("text/css")
    20  
    21  var IsJavaScript goproxy.RespCondition = goproxy.ContentTypeIs("text/javascript",
    22  	"application/javascript")
    23  
    24  var IsJson goproxy.RespCondition = goproxy.ContentTypeIs("text/json")
    25  
    26  var IsXml goproxy.RespCondition = goproxy.ContentTypeIs("text/xml")
    27  
    28  var IsWebRelatedText goproxy.RespCondition = goproxy.ContentTypeIs("text/html",
    29  	"text/css",
    30  	"text/javascript", "application/javascript",
    31  	"text/xml",
    32  	"text/json")
    33  
    34  // HandleString will receive a function that filters a string, and will convert the
    35  // request body to a utf8 string, according to the charset specified in the Content-Type
    36  // header.
    37  // guessing Html charset encoding from the <META> tags is not yet implemented.
    38  func HandleString(f func(s string, ctx *goproxy.ProxyCtx) string) goproxy.RespHandler {
    39  	return HandleStringReader(func(r io.Reader, ctx *goproxy.ProxyCtx) io.Reader {
    40  		b, err := ioutil.ReadAll(r)
    41  		if err != nil {
    42  			ctx.Warnf("Cannot read string from resp body: %v", err)
    43  			return r
    44  		}
    45  		return bytes.NewBufferString(f(string(b), ctx))
    46  	})
    47  }
    48  
    49  // Will receive an input stream which would convert the response to utf-8
    50  // The given function must close the reader r, in order to close the response body.
    51  func HandleStringReader(f func(r io.Reader, ctx *goproxy.ProxyCtx) io.Reader) goproxy.RespHandler {
    52  	return goproxy.FuncRespHandler(func(resp *http.Response, ctx *goproxy.ProxyCtx) *http.Response {
    53  		if ctx.Error != nil {
    54  			return nil
    55  		}
    56  		charsetName := ctx.Charset()
    57  		if charsetName == "" {
    58  			charsetName = "utf-8"
    59  		}
    60  
    61  		if strings.ToLower(charsetName) != "utf-8" {
    62  			r, err := charset.NewReader(charsetName, resp.Body)
    63  			if err != nil {
    64  				ctx.Warnf("Cannot convert from %v to utf-8: %v", charsetName, err)
    65  				return resp
    66  			}
    67  			tr, err := charset.TranslatorTo(charsetName)
    68  			if err != nil {
    69  				ctx.Warnf("Can't translate to %v from utf-8: %v", charsetName, err)
    70  				return resp
    71  			}
    72  			if err != nil {
    73  				ctx.Warnf("Cannot translate to %v: %v", charsetName, err)
    74  				return resp
    75  			}
    76  			newr := charset.NewTranslatingReader(f(r, ctx), tr)
    77  			resp.Body = &readFirstCloseBoth{ioutil.NopCloser(newr), resp.Body}
    78  		} else {
    79  			//no translation is needed, already at utf-8
    80  			resp.Body = &readFirstCloseBoth{ioutil.NopCloser(f(resp.Body, ctx)), resp.Body}
    81  		}
    82  		return resp
    83  	})
    84  }
    85  
    86  type readFirstCloseBoth struct {
    87  	r io.ReadCloser
    88  	c io.Closer
    89  }
    90  
    91  func (rfcb *readFirstCloseBoth) Read(b []byte) (nr int, err error) {
    92  	return rfcb.r.Read(b)
    93  }
    94  func (rfcb *readFirstCloseBoth) Close() error {
    95  	err1 := rfcb.r.Close()
    96  	err2 := rfcb.c.Close()
    97  	if err1 != nil && err2 != nil {
    98  		return errors.New(err1.Error() + ", " + err2.Error())
    99  	}
   100  	if err1 != nil {
   101  		return err1
   102  	}
   103  	return err2
   104  }