github.com/grafana/pyroscope@v1.18.0/pkg/util/gziphandler/gzip.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/nytimes/gziphandler/blob/2f8bb1d30d9d69c8e0c3714da5a9917125a87769/gzip.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: Copyright 2016-2017 The New York Times Company.
     5  
     6  package gziphandler
     7  
     8  import (
     9  	"bufio"
    10  	"compress/gzip"
    11  	"fmt"
    12  	"io"
    13  	"mime"
    14  	"net"
    15  	"net/http"
    16  	"strconv"
    17  	"strings"
    18  	"sync"
    19  )
    20  
    21  const (
    22  	vary            = "Vary"
    23  	acceptEncoding  = "Accept-Encoding"
    24  	contentEncoding = "Content-Encoding"
    25  	contentType     = "Content-Type"
    26  	contentLength   = "Content-Length"
    27  )
    28  
    29  type codings map[string]float64
    30  
    31  const (
    32  	// DefaultQValue is the default qvalue to assign to an encoding if no explicit qvalue is set.
    33  	// This is actually kind of ambiguous in RFC 2616, so hopefully it's correct.
    34  	// The examples seem to indicate that it is.
    35  	DefaultQValue = 1.0
    36  
    37  	// DefaultMinSize is the default minimum size until we enable gzip compression.
    38  	// 1500 bytes is the MTU size for the internet since that is the largest size allowed at the network layer.
    39  	// If you take a file that is 1300 bytes and compress it to 800 bytes, it’s still transmitted in that same 1500 byte packet regardless, so you’ve gained nothing.
    40  	// That being the case, you should restrict the gzip compression to files with a size greater than a single packet, 1400 bytes (1.4KB) is a safe value.
    41  	DefaultMinSize = 1400
    42  )
    43  
    44  // gzipWriterPools stores a sync.Pool for each compression level for reuse of
    45  // gzip.Writers. Use poolIndex to covert a compression level to an index into
    46  // gzipWriterPools.
    47  var gzipWriterPools [gzip.BestCompression - gzip.BestSpeed + 2]*sync.Pool
    48  
    49  func init() {
    50  	for i := gzip.BestSpeed; i <= gzip.BestCompression; i++ {
    51  		addLevelPool(i)
    52  	}
    53  	addLevelPool(gzip.DefaultCompression)
    54  }
    55  
    56  // poolIndex maps a compression level to its index into gzipWriterPools. It
    57  // assumes that level is a valid gzip compression level.
    58  func poolIndex(level int) int {
    59  	// gzip.DefaultCompression == -1, so we need to treat it special.
    60  	if level == gzip.DefaultCompression {
    61  		return gzip.BestCompression - gzip.BestSpeed + 1
    62  	}
    63  	return level - gzip.BestSpeed
    64  }
    65  
    66  func addLevelPool(level int) {
    67  	gzipWriterPools[poolIndex(level)] = &sync.Pool{
    68  		New: func() interface{} {
    69  			// NewWriterLevel only returns error on a bad level, we are guaranteeing
    70  			// that this will be a valid level so it is okay to ignore the returned
    71  			// error.
    72  			w, _ := gzip.NewWriterLevel(nil, level)
    73  			return w
    74  		},
    75  	}
    76  }
    77  
    78  // GzipResponseWriter provides an http.ResponseWriter interface, which gzips
    79  // bytes before writing them to the underlying response. This doesn't close the
    80  // writers, so don't forget to do that.
    81  // It can be configured to skip response smaller than minSize.
    82  type GzipResponseWriter struct {
    83  	http.ResponseWriter
    84  	index int // Index for gzipWriterPools.
    85  	gw    *gzip.Writer
    86  
    87  	code int // Saves the WriteHeader value.
    88  
    89  	minSize int    // Specifies the minimum response size to gzip. If the response length is bigger than this value, it is compressed.
    90  	buf     []byte // Holds the first part of the write before reaching the minSize or the end of the write.
    91  	ignore  bool   // If true, then we immediately passthru writes to the underlying ResponseWriter.
    92  
    93  	contentTypes    []parsedContentType // Only compress if the response is one of these content-types. All are accepted if empty.
    94  	rejectsIdentity bool                // If true, then request explicitly rejected non-encoded requests.
    95  }
    96  
    97  // Write appends data to the gzip writer.
    98  func (w *GzipResponseWriter) Write(b []byte) (int, error) {
    99  	// GZIP responseWriter is initialized. Use the GZIP responseWriter.
   100  	if w.gw != nil {
   101  		return w.gw.Write(b)
   102  	}
   103  
   104  	// If we have already decided not to use GZIP, immediately passthrough.
   105  	if w.ignore {
   106  		return w.ResponseWriter.Write(b)
   107  	}
   108  
   109  	// Save the write into a buffer for later use in GZIP responseWriter (if content is long enough) or at close with regular responseWriter.
   110  	// On the first write, w.buf changes from nil to a valid slice
   111  	w.buf = append(w.buf, b...)
   112  
   113  	var (
   114  		cl, _ = strconv.Atoi(w.Header().Get(contentLength))
   115  		ct    = w.Header().Get(contentType)
   116  		ce    = w.Header().Get(contentEncoding)
   117  	)
   118  
   119  	// Don't encode again encoded content.
   120  	// There's no need to check whether the client rejected the identity encoding
   121  	// because we already know that this has a different encoding.
   122  	if ce != "" {
   123  		return w.startPlainWrite(len(b))
   124  	}
   125  
   126  	// Don't encode when content length is known, it's less than min size,
   127  	// and the caller didn't reject the identity encoding.
   128  	if cl > 0 && cl < w.minSize && !w.rejectsIdentity {
   129  		return w.startPlainWrite(len(b))
   130  	}
   131  
   132  	// Only continue if we handle the content type (or it's still unknown).
   133  	if handleContentType(w.contentTypes, ct) {
   134  		// If the current buffer is less than minSize and a Content-Length isn't set, then wait until we have more data.
   135  		if len(w.buf) < w.minSize && cl == 0 && !w.rejectsIdentity {
   136  			return len(b), nil
   137  		}
   138  		// If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
   139  		if cl >= w.minSize || len(w.buf) >= w.minSize || w.rejectsIdentity {
   140  			// If a Content-Type wasn't specified, infer it from the current buffer.
   141  			if ct == "" {
   142  				ct = http.DetectContentType(w.buf)
   143  				w.Header().Set(contentType, ct)
   144  			}
   145  			// If the Content-Type is acceptable to GZIP, initialize the GZIP writer.
   146  			// Note that we're ignoring the `rejectsIdentity` here, because we'd have to return a 406 Not Acceptable
   147  			// in that case, but we still might be wrapped by another handler that handles a different encoding.
   148  			if handleContentType(w.contentTypes, ct) {
   149  				if err := w.startGzip(); err != nil {
   150  					return 0, err
   151  				}
   152  				return len(b), nil
   153  			}
   154  		}
   155  	}
   156  
   157  	// If we got here, we should not GZIP this response.
   158  	return w.startPlainWrite(len(b))
   159  }
   160  
   161  func (w *GzipResponseWriter) startPlainWrite(blen int) (int, error) {
   162  	if err := w.startPlain(); err != nil {
   163  		return 0, err
   164  	}
   165  	return blen, nil
   166  }
   167  
   168  // startGzip initializes a GZIP writer and writes the buffer.
   169  func (w *GzipResponseWriter) startGzip() error {
   170  	// Set the GZIP header.
   171  	w.Header().Set(contentEncoding, "gzip")
   172  
   173  	// if the Content-Length is already set, then calls to Write on gzip
   174  	// will fail to set the Content-Length header since its already set
   175  	// See: https://github.com/golang/go/issues/14975.
   176  	w.Header().Del(contentLength)
   177  
   178  	// Write the header to gzip response.
   179  	if w.code != 0 {
   180  		w.ResponseWriter.WriteHeader(w.code)
   181  		// Ensure that no other WriteHeader's happen
   182  		w.code = 0
   183  	}
   184  
   185  	// Initialize and flush the buffer into the gzip response if there are any bytes.
   186  	// If there aren't any, we shouldn't initialize it yet because on Close it will
   187  	// write the gzip header even if nothing was ever written.
   188  	if len(w.buf) > 0 {
   189  		// Initialize the GZIP response.
   190  		w.init()
   191  		n, err := w.gw.Write(w.buf)
   192  
   193  		// This should never happen (per io.Writer docs), but if the write didn't
   194  		// accept the entire buffer but returned no specific error, we have no clue
   195  		// what's going on, so abort just to be safe.
   196  		if err == nil && n < len(w.buf) {
   197  			err = io.ErrShortWrite
   198  		}
   199  		return err
   200  	}
   201  	return nil
   202  }
   203  
   204  // startPlain writes to sent bytes and buffer the underlying ResponseWriter without gzip.
   205  func (w *GzipResponseWriter) startPlain() error {
   206  	if w.code != 0 {
   207  		w.ResponseWriter.WriteHeader(w.code)
   208  		// Ensure that no other WriteHeader's happen
   209  		w.code = 0
   210  	}
   211  	w.ignore = true
   212  	// If Write was never called then don't call Write on the underlying ResponseWriter.
   213  	if w.buf == nil {
   214  		return nil
   215  	}
   216  	n, err := w.ResponseWriter.Write(w.buf)
   217  	w.buf = nil
   218  	// This should never happen (per io.Writer docs), but if the write didn't
   219  	// accept the entire buffer but returned no specific error, we have no clue
   220  	// what's going on, so abort just to be safe.
   221  	if err == nil && n < len(w.buf) {
   222  		err = io.ErrShortWrite
   223  	}
   224  	return err
   225  }
   226  
   227  // WriteHeader just saves the response code until close or GZIP effective writes.
   228  func (w *GzipResponseWriter) WriteHeader(code int) {
   229  	if w.code == 0 {
   230  		w.code = code
   231  	}
   232  }
   233  
   234  // init graps a new gzip writer from the gzipWriterPool and writes the correct
   235  // content encoding header.
   236  func (w *GzipResponseWriter) init() {
   237  	// Bytes written during ServeHTTP are redirected to this gzip writer
   238  	// before being written to the underlying response.
   239  	gzw := gzipWriterPools[w.index].Get().(*gzip.Writer)
   240  	gzw.Reset(w.ResponseWriter)
   241  	w.gw = gzw
   242  }
   243  
   244  // Close will close the gzip.Writer and will put it back in the gzipWriterPool.
   245  func (w *GzipResponseWriter) Close() error {
   246  	if w.ignore {
   247  		return nil
   248  	}
   249  
   250  	if w.gw == nil {
   251  		// GZIP not triggered yet, write out regular response.
   252  		err := w.startPlain()
   253  		// Returns the error if any at write.
   254  		if err != nil {
   255  			err = fmt.Errorf("gziphandler: write to regular responseWriter at close gets error: %q", err.Error())
   256  		}
   257  		return err
   258  	}
   259  
   260  	err := w.gw.Close()
   261  	gzipWriterPools[w.index].Put(w.gw)
   262  	w.gw = nil
   263  	return err
   264  }
   265  
   266  // Flush flushes the underlying *gzip.Writer and then the underlying
   267  // http.ResponseWriter if it is an http.Flusher. This makes GzipResponseWriter
   268  // an http.Flusher.
   269  func (w *GzipResponseWriter) Flush() {
   270  	if w.gw == nil && !w.ignore {
   271  		// Only flush once startGzip or startPlain has been called.
   272  		//
   273  		// Flush is thus a no-op until we're certain whether a plain
   274  		// or gzipped response will be served.
   275  		return
   276  	}
   277  
   278  	if w.gw != nil {
   279  		w.gw.Flush()
   280  	}
   281  
   282  	if fw, ok := w.ResponseWriter.(http.Flusher); ok {
   283  		fw.Flush()
   284  	}
   285  }
   286  
   287  // Hijack implements http.Hijacker. If the underlying ResponseWriter is a
   288  // Hijacker, its Hijack method is returned. Otherwise an error is returned.
   289  func (w *GzipResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
   290  	if hj, ok := w.ResponseWriter.(http.Hijacker); ok {
   291  		return hj.Hijack()
   292  	}
   293  	return nil, nil, fmt.Errorf("http.Hijacker interface is not supported")
   294  }
   295  
   296  // verify Hijacker interface implementation
   297  var _ http.Hijacker = &GzipResponseWriter{}
   298  
   299  // MustNewGzipLevelHandler behaves just like NewGzipLevelHandler except that in
   300  // an error case it panics rather than returning an error.
   301  func MustNewGzipLevelHandler(level int) func(http.Handler) http.Handler {
   302  	wrap, err := NewGzipLevelHandler(level)
   303  	if err != nil {
   304  		panic(err)
   305  	}
   306  	return wrap
   307  }
   308  
   309  // NewGzipLevelHandler returns a wrapper function (often known as middleware)
   310  // which can be used to wrap an HTTP handler to transparently gzip the response
   311  // body if the client supports it (via the Accept-Encoding header). Responses will
   312  // be encoded at the given gzip compression level. An error will be returned only
   313  // if an invalid gzip compression level is given, so if one can ensure the level
   314  // is valid, the returned error can be safely ignored.
   315  func NewGzipLevelHandler(level int) (func(http.Handler) http.Handler, error) {
   316  	return NewGzipLevelAndMinSize(level, DefaultMinSize)
   317  }
   318  
   319  // NewGzipLevelAndMinSize behave as NewGzipLevelHandler except it let the caller
   320  // specify the minimum size before compression.
   321  func NewGzipLevelAndMinSize(level, minSize int) (func(http.Handler) http.Handler, error) {
   322  	return GzipHandlerWithOpts(CompressionLevel(level), MinSize(minSize))
   323  }
   324  
   325  // GzipHandlerWithOpts creates a middleware that wraps http.Handler with GzipHandler, configured with provided options.
   326  //
   327  //nolint:revive
   328  func GzipHandlerWithOpts(opts ...Option) (func(http.Handler) http.Handler, error) {
   329  	c := &config{
   330  		level:   gzip.DefaultCompression,
   331  		minSize: DefaultMinSize,
   332  	}
   333  
   334  	for _, o := range opts {
   335  		o(c)
   336  	}
   337  
   338  	if err := c.validate(); err != nil {
   339  		return nil, err
   340  	}
   341  
   342  	return func(h http.Handler) http.Handler {
   343  		index := poolIndex(c.level)
   344  
   345  		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   346  			w.Header().Add(vary, acceptEncoding)
   347  			if acceptsGzip, rejectsIdentity := requestAcceptance(r); acceptsGzip {
   348  				gw := &GzipResponseWriter{
   349  					ResponseWriter:  w,
   350  					index:           index,
   351  					minSize:         c.minSize,
   352  					contentTypes:    c.contentTypes,
   353  					rejectsIdentity: rejectsIdentity,
   354  				}
   355  				defer gw.Close()
   356  
   357  				h.ServeHTTP(gw, r)
   358  
   359  			} else {
   360  				h.ServeHTTP(w, r)
   361  			}
   362  		})
   363  	}, nil
   364  }
   365  
   366  // Parsed representation of one of the inputs to ContentTypes.
   367  // See https://golang.org/pkg/mime/#ParseMediaType
   368  type parsedContentType struct {
   369  	mediaType string
   370  	params    map[string]string
   371  }
   372  
   373  // equals returns whether this content type matches another content type.
   374  func (pct parsedContentType) equals(mediaType string, params map[string]string) bool {
   375  	if pct.mediaType != mediaType {
   376  		return false
   377  	}
   378  	// if pct has no params, don't care about other's params
   379  	if len(pct.params) == 0 {
   380  		return true
   381  	}
   382  
   383  	// if pct has any params, they must be identical to other's.
   384  	if len(pct.params) != len(params) {
   385  		return false
   386  	}
   387  	for k, v := range pct.params {
   388  		if w, ok := params[k]; !ok || v != w {
   389  			return false
   390  		}
   391  	}
   392  	return true
   393  }
   394  
   395  // Used for functional configuration.
   396  type config struct {
   397  	minSize      int
   398  	level        int
   399  	contentTypes []parsedContentType
   400  }
   401  
   402  func (c *config) validate() error {
   403  	if c.level != gzip.DefaultCompression && (c.level < gzip.BestSpeed || c.level > gzip.BestCompression) {
   404  		return fmt.Errorf("invalid compression level requested: %d", c.level)
   405  	}
   406  
   407  	if c.minSize < 0 {
   408  		return fmt.Errorf("minimum size must be more than zero")
   409  	}
   410  
   411  	return nil
   412  }
   413  
   414  type Option func(c *config)
   415  
   416  func MinSize(size int) Option {
   417  	return func(c *config) {
   418  		c.minSize = size
   419  	}
   420  }
   421  
   422  func CompressionLevel(level int) Option {
   423  	return func(c *config) {
   424  		c.level = level
   425  	}
   426  }
   427  
   428  // ContentTypes specifies a list of content types to compare
   429  // the Content-Type header to before compressing. If none
   430  // match, the response will be returned as-is.
   431  //
   432  // Content types are compared in a case-insensitive, whitespace-ignored
   433  // manner.
   434  //
   435  // A MIME type without any other directive will match a content type
   436  // that has the same MIME type, regardless of that content type's other
   437  // directives. I.e., "text/html" will match both "text/html" and
   438  // "text/html; charset=utf-8".
   439  //
   440  // A MIME type with any other directive will only match a content type
   441  // that has the same MIME type and other directives. I.e.,
   442  // "text/html; charset=utf-8" will only match "text/html; charset=utf-8".
   443  //
   444  // By default, responses are gzipped regardless of
   445  // Content-Type.
   446  func ContentTypes(types []string) Option {
   447  	return func(c *config) {
   448  		c.contentTypes = []parsedContentType{}
   449  		for _, v := range types {
   450  			mediaType, params, err := mime.ParseMediaType(v)
   451  			if err == nil {
   452  				c.contentTypes = append(c.contentTypes, parsedContentType{mediaType, params})
   453  			}
   454  		}
   455  	}
   456  }
   457  
   458  // GzipHandler wraps an HTTP handler, to transparently gzip the response body if
   459  // the client supports it (via the Accept-Encoding header). This will compress at
   460  // the default compression level.
   461  func GzipHandler(h http.Handler) http.Handler {
   462  	wrapper, _ := NewGzipLevelHandler(gzip.DefaultCompression)
   463  	return wrapper(h)
   464  }
   465  
   466  // requestAcceptance checks whether a given HTTP request indicates that it will
   467  // accept a gzipped response and whether it's going to reject an non-encoded response.
   468  //
   469  // acceptsGzip is true if the given HTTP request indicates that it will
   470  // accept a gzipped response and/or an identity request.
   471  // rejectsIdentity is false if the given HTTP request didn't explicitly exclude identity encoding.
   472  // I.e., either "identity;q=0" or "*;q=0" without a more specific entry for "identity".
   473  // See https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.4
   474  func requestAcceptance(r *http.Request) (acceptsGzip bool, rejectsIdentity bool) {
   475  	acceptedEncodings, _ := parseEncodings(r.Header.Get(acceptEncoding))
   476  
   477  	identity, iset := acceptedEncodings["identity"]
   478  	wildcard, wset := acceptedEncodings["*"]
   479  	rejectsIdentity = (iset && identity == 0) || (!iset && wset && wildcard == 0)
   480  
   481  	gzip, gzset := acceptedEncodings["gzip"]
   482  	acceptsGzip = gzip > 0 || (!gzset && wildcard > 0)
   483  
   484  	return acceptsGzip, rejectsIdentity
   485  }
   486  
   487  // returns true if we've been configured to compress the specific content type.
   488  func handleContentType(contentTypes []parsedContentType, ct string) bool {
   489  	// If unknown, then handle by default.
   490  	if ct == "" {
   491  		return true
   492  	}
   493  
   494  	// If contentTypes is empty we handle all content types.
   495  	if len(contentTypes) == 0 {
   496  		return true
   497  	}
   498  
   499  	mediaType, params, err := mime.ParseMediaType(ct)
   500  	if err != nil {
   501  		return false
   502  	}
   503  
   504  	for _, c := range contentTypes {
   505  		if c.equals(mediaType, params) {
   506  			return true
   507  		}
   508  	}
   509  
   510  	return false
   511  }
   512  
   513  // parseEncodings attempts to parse a list of codings, per RFC 2616, as might
   514  // appear in an Accept-Encoding header. It returns a map of content-codings to
   515  // quality values, and an error containing the errors encountered. It's probably
   516  // safe to ignore those, because silently ignoring errors is how the internet
   517  // works.
   518  //
   519  // See: http://tools.ietf.org/html/rfc2616#section-14.3.
   520  func parseEncodings(s string) (codings, error) {
   521  	c := make(codings)
   522  	var e []string
   523  	var ss string
   524  	var found bool
   525  
   526  	for {
   527  		ss, s, found = strings.Cut(s, ",")
   528  		coding, qvalue, err := parseCoding(ss)
   529  
   530  		if err != nil {
   531  			e = append(e, err.Error())
   532  		} else {
   533  			c[coding] = qvalue
   534  		}
   535  
   536  		if !found {
   537  			break
   538  		}
   539  	}
   540  
   541  	// TODO (adammck): Use a proper multi-error struct, so the individual errors
   542  	//                 can be extracted if anyone cares.
   543  	if len(e) > 0 {
   544  		return c, fmt.Errorf("errors while parsing encodings: %s", strings.Join(e, ", "))
   545  	}
   546  
   547  	return c, nil
   548  }
   549  
   550  // parseCoding parses a single conding (content-coding with an optional qvalue),
   551  // as might appear in an Accept-Encoding header. It attempts to forgive minor
   552  // formatting errors.
   553  func parseCoding(s string) (coding string, qvalue float64, err error) {
   554  	for n, part := range strings.Split(s, ";") {
   555  		part = strings.TrimSpace(part)
   556  		qvalue = DefaultQValue
   557  
   558  		if n == 0 {
   559  			coding = strings.ToLower(part)
   560  		} else if strings.HasPrefix(part, "q=") {
   561  			qvalue, err = strconv.ParseFloat(strings.TrimPrefix(part, "q="), 64)
   562  
   563  			if qvalue < 0.0 {
   564  				qvalue = 0.0
   565  			} else if qvalue > 1.0 {
   566  				qvalue = 1.0
   567  			}
   568  		}
   569  	}
   570  
   571  	if coding == "" {
   572  		err = fmt.Errorf("empty content-coding")
   573  	}
   574  
   575  	return
   576  }