github.com/grafana/pyroscope@v1.18.0/pkg/util/gziphandler/gzip.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 // Provenance-includes-location: https://github.com/nytimes/gziphandler/blob/2f8bb1d30d9d69c8e0c3714da5a9917125a87769/gzip.go 3 // Provenance-includes-license: Apache-2.0 4 // Provenance-includes-copyright: Copyright 2016-2017 The New York Times Company. 5 6 package gziphandler 7 8 import ( 9 "bufio" 10 "compress/gzip" 11 "fmt" 12 "io" 13 "mime" 14 "net" 15 "net/http" 16 "strconv" 17 "strings" 18 "sync" 19 ) 20 21 const ( 22 vary = "Vary" 23 acceptEncoding = "Accept-Encoding" 24 contentEncoding = "Content-Encoding" 25 contentType = "Content-Type" 26 contentLength = "Content-Length" 27 ) 28 29 type codings map[string]float64 30 31 const ( 32 // DefaultQValue is the default qvalue to assign to an encoding if no explicit qvalue is set. 33 // This is actually kind of ambiguous in RFC 2616, so hopefully it's correct. 34 // The examples seem to indicate that it is. 35 DefaultQValue = 1.0 36 37 // DefaultMinSize is the default minimum size until we enable gzip compression. 38 // 1500 bytes is the MTU size for the internet since that is the largest size allowed at the network layer. 39 // If you take a file that is 1300 bytes and compress it to 800 bytes, it’s still transmitted in that same 1500 byte packet regardless, so you’ve gained nothing. 40 // That being the case, you should restrict the gzip compression to files with a size greater than a single packet, 1400 bytes (1.4KB) is a safe value. 41 DefaultMinSize = 1400 42 ) 43 44 // gzipWriterPools stores a sync.Pool for each compression level for reuse of 45 // gzip.Writers. Use poolIndex to covert a compression level to an index into 46 // gzipWriterPools. 47 var gzipWriterPools [gzip.BestCompression - gzip.BestSpeed + 2]*sync.Pool 48 49 func init() { 50 for i := gzip.BestSpeed; i <= gzip.BestCompression; i++ { 51 addLevelPool(i) 52 } 53 addLevelPool(gzip.DefaultCompression) 54 } 55 56 // poolIndex maps a compression level to its index into gzipWriterPools. It 57 // assumes that level is a valid gzip compression level. 58 func poolIndex(level int) int { 59 // gzip.DefaultCompression == -1, so we need to treat it special. 60 if level == gzip.DefaultCompression { 61 return gzip.BestCompression - gzip.BestSpeed + 1 62 } 63 return level - gzip.BestSpeed 64 } 65 66 func addLevelPool(level int) { 67 gzipWriterPools[poolIndex(level)] = &sync.Pool{ 68 New: func() interface{} { 69 // NewWriterLevel only returns error on a bad level, we are guaranteeing 70 // that this will be a valid level so it is okay to ignore the returned 71 // error. 72 w, _ := gzip.NewWriterLevel(nil, level) 73 return w 74 }, 75 } 76 } 77 78 // GzipResponseWriter provides an http.ResponseWriter interface, which gzips 79 // bytes before writing them to the underlying response. This doesn't close the 80 // writers, so don't forget to do that. 81 // It can be configured to skip response smaller than minSize. 82 type GzipResponseWriter struct { 83 http.ResponseWriter 84 index int // Index for gzipWriterPools. 85 gw *gzip.Writer 86 87 code int // Saves the WriteHeader value. 88 89 minSize int // Specifies the minimum response size to gzip. If the response length is bigger than this value, it is compressed. 90 buf []byte // Holds the first part of the write before reaching the minSize or the end of the write. 91 ignore bool // If true, then we immediately passthru writes to the underlying ResponseWriter. 92 93 contentTypes []parsedContentType // Only compress if the response is one of these content-types. All are accepted if empty. 94 rejectsIdentity bool // If true, then request explicitly rejected non-encoded requests. 95 } 96 97 // Write appends data to the gzip writer. 98 func (w *GzipResponseWriter) Write(b []byte) (int, error) { 99 // GZIP responseWriter is initialized. Use the GZIP responseWriter. 100 if w.gw != nil { 101 return w.gw.Write(b) 102 } 103 104 // If we have already decided not to use GZIP, immediately passthrough. 105 if w.ignore { 106 return w.ResponseWriter.Write(b) 107 } 108 109 // Save the write into a buffer for later use in GZIP responseWriter (if content is long enough) or at close with regular responseWriter. 110 // On the first write, w.buf changes from nil to a valid slice 111 w.buf = append(w.buf, b...) 112 113 var ( 114 cl, _ = strconv.Atoi(w.Header().Get(contentLength)) 115 ct = w.Header().Get(contentType) 116 ce = w.Header().Get(contentEncoding) 117 ) 118 119 // Don't encode again encoded content. 120 // There's no need to check whether the client rejected the identity encoding 121 // because we already know that this has a different encoding. 122 if ce != "" { 123 return w.startPlainWrite(len(b)) 124 } 125 126 // Don't encode when content length is known, it's less than min size, 127 // and the caller didn't reject the identity encoding. 128 if cl > 0 && cl < w.minSize && !w.rejectsIdentity { 129 return w.startPlainWrite(len(b)) 130 } 131 132 // Only continue if we handle the content type (or it's still unknown). 133 if handleContentType(w.contentTypes, ct) { 134 // If the current buffer is less than minSize and a Content-Length isn't set, then wait until we have more data. 135 if len(w.buf) < w.minSize && cl == 0 && !w.rejectsIdentity { 136 return len(b), nil 137 } 138 // If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue. 139 if cl >= w.minSize || len(w.buf) >= w.minSize || w.rejectsIdentity { 140 // If a Content-Type wasn't specified, infer it from the current buffer. 141 if ct == "" { 142 ct = http.DetectContentType(w.buf) 143 w.Header().Set(contentType, ct) 144 } 145 // If the Content-Type is acceptable to GZIP, initialize the GZIP writer. 146 // Note that we're ignoring the `rejectsIdentity` here, because we'd have to return a 406 Not Acceptable 147 // in that case, but we still might be wrapped by another handler that handles a different encoding. 148 if handleContentType(w.contentTypes, ct) { 149 if err := w.startGzip(); err != nil { 150 return 0, err 151 } 152 return len(b), nil 153 } 154 } 155 } 156 157 // If we got here, we should not GZIP this response. 158 return w.startPlainWrite(len(b)) 159 } 160 161 func (w *GzipResponseWriter) startPlainWrite(blen int) (int, error) { 162 if err := w.startPlain(); err != nil { 163 return 0, err 164 } 165 return blen, nil 166 } 167 168 // startGzip initializes a GZIP writer and writes the buffer. 169 func (w *GzipResponseWriter) startGzip() error { 170 // Set the GZIP header. 171 w.Header().Set(contentEncoding, "gzip") 172 173 // if the Content-Length is already set, then calls to Write on gzip 174 // will fail to set the Content-Length header since its already set 175 // See: https://github.com/golang/go/issues/14975. 176 w.Header().Del(contentLength) 177 178 // Write the header to gzip response. 179 if w.code != 0 { 180 w.ResponseWriter.WriteHeader(w.code) 181 // Ensure that no other WriteHeader's happen 182 w.code = 0 183 } 184 185 // Initialize and flush the buffer into the gzip response if there are any bytes. 186 // If there aren't any, we shouldn't initialize it yet because on Close it will 187 // write the gzip header even if nothing was ever written. 188 if len(w.buf) > 0 { 189 // Initialize the GZIP response. 190 w.init() 191 n, err := w.gw.Write(w.buf) 192 193 // This should never happen (per io.Writer docs), but if the write didn't 194 // accept the entire buffer but returned no specific error, we have no clue 195 // what's going on, so abort just to be safe. 196 if err == nil && n < len(w.buf) { 197 err = io.ErrShortWrite 198 } 199 return err 200 } 201 return nil 202 } 203 204 // startPlain writes to sent bytes and buffer the underlying ResponseWriter without gzip. 205 func (w *GzipResponseWriter) startPlain() error { 206 if w.code != 0 { 207 w.ResponseWriter.WriteHeader(w.code) 208 // Ensure that no other WriteHeader's happen 209 w.code = 0 210 } 211 w.ignore = true 212 // If Write was never called then don't call Write on the underlying ResponseWriter. 213 if w.buf == nil { 214 return nil 215 } 216 n, err := w.ResponseWriter.Write(w.buf) 217 w.buf = nil 218 // This should never happen (per io.Writer docs), but if the write didn't 219 // accept the entire buffer but returned no specific error, we have no clue 220 // what's going on, so abort just to be safe. 221 if err == nil && n < len(w.buf) { 222 err = io.ErrShortWrite 223 } 224 return err 225 } 226 227 // WriteHeader just saves the response code until close or GZIP effective writes. 228 func (w *GzipResponseWriter) WriteHeader(code int) { 229 if w.code == 0 { 230 w.code = code 231 } 232 } 233 234 // init graps a new gzip writer from the gzipWriterPool and writes the correct 235 // content encoding header. 236 func (w *GzipResponseWriter) init() { 237 // Bytes written during ServeHTTP are redirected to this gzip writer 238 // before being written to the underlying response. 239 gzw := gzipWriterPools[w.index].Get().(*gzip.Writer) 240 gzw.Reset(w.ResponseWriter) 241 w.gw = gzw 242 } 243 244 // Close will close the gzip.Writer and will put it back in the gzipWriterPool. 245 func (w *GzipResponseWriter) Close() error { 246 if w.ignore { 247 return nil 248 } 249 250 if w.gw == nil { 251 // GZIP not triggered yet, write out regular response. 252 err := w.startPlain() 253 // Returns the error if any at write. 254 if err != nil { 255 err = fmt.Errorf("gziphandler: write to regular responseWriter at close gets error: %q", err.Error()) 256 } 257 return err 258 } 259 260 err := w.gw.Close() 261 gzipWriterPools[w.index].Put(w.gw) 262 w.gw = nil 263 return err 264 } 265 266 // Flush flushes the underlying *gzip.Writer and then the underlying 267 // http.ResponseWriter if it is an http.Flusher. This makes GzipResponseWriter 268 // an http.Flusher. 269 func (w *GzipResponseWriter) Flush() { 270 if w.gw == nil && !w.ignore { 271 // Only flush once startGzip or startPlain has been called. 272 // 273 // Flush is thus a no-op until we're certain whether a plain 274 // or gzipped response will be served. 275 return 276 } 277 278 if w.gw != nil { 279 w.gw.Flush() 280 } 281 282 if fw, ok := w.ResponseWriter.(http.Flusher); ok { 283 fw.Flush() 284 } 285 } 286 287 // Hijack implements http.Hijacker. If the underlying ResponseWriter is a 288 // Hijacker, its Hijack method is returned. Otherwise an error is returned. 289 func (w *GzipResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) { 290 if hj, ok := w.ResponseWriter.(http.Hijacker); ok { 291 return hj.Hijack() 292 } 293 return nil, nil, fmt.Errorf("http.Hijacker interface is not supported") 294 } 295 296 // verify Hijacker interface implementation 297 var _ http.Hijacker = &GzipResponseWriter{} 298 299 // MustNewGzipLevelHandler behaves just like NewGzipLevelHandler except that in 300 // an error case it panics rather than returning an error. 301 func MustNewGzipLevelHandler(level int) func(http.Handler) http.Handler { 302 wrap, err := NewGzipLevelHandler(level) 303 if err != nil { 304 panic(err) 305 } 306 return wrap 307 } 308 309 // NewGzipLevelHandler returns a wrapper function (often known as middleware) 310 // which can be used to wrap an HTTP handler to transparently gzip the response 311 // body if the client supports it (via the Accept-Encoding header). Responses will 312 // be encoded at the given gzip compression level. An error will be returned only 313 // if an invalid gzip compression level is given, so if one can ensure the level 314 // is valid, the returned error can be safely ignored. 315 func NewGzipLevelHandler(level int) (func(http.Handler) http.Handler, error) { 316 return NewGzipLevelAndMinSize(level, DefaultMinSize) 317 } 318 319 // NewGzipLevelAndMinSize behave as NewGzipLevelHandler except it let the caller 320 // specify the minimum size before compression. 321 func NewGzipLevelAndMinSize(level, minSize int) (func(http.Handler) http.Handler, error) { 322 return GzipHandlerWithOpts(CompressionLevel(level), MinSize(minSize)) 323 } 324 325 // GzipHandlerWithOpts creates a middleware that wraps http.Handler with GzipHandler, configured with provided options. 326 // 327 //nolint:revive 328 func GzipHandlerWithOpts(opts ...Option) (func(http.Handler) http.Handler, error) { 329 c := &config{ 330 level: gzip.DefaultCompression, 331 minSize: DefaultMinSize, 332 } 333 334 for _, o := range opts { 335 o(c) 336 } 337 338 if err := c.validate(); err != nil { 339 return nil, err 340 } 341 342 return func(h http.Handler) http.Handler { 343 index := poolIndex(c.level) 344 345 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 346 w.Header().Add(vary, acceptEncoding) 347 if acceptsGzip, rejectsIdentity := requestAcceptance(r); acceptsGzip { 348 gw := &GzipResponseWriter{ 349 ResponseWriter: w, 350 index: index, 351 minSize: c.minSize, 352 contentTypes: c.contentTypes, 353 rejectsIdentity: rejectsIdentity, 354 } 355 defer gw.Close() 356 357 h.ServeHTTP(gw, r) 358 359 } else { 360 h.ServeHTTP(w, r) 361 } 362 }) 363 }, nil 364 } 365 366 // Parsed representation of one of the inputs to ContentTypes. 367 // See https://golang.org/pkg/mime/#ParseMediaType 368 type parsedContentType struct { 369 mediaType string 370 params map[string]string 371 } 372 373 // equals returns whether this content type matches another content type. 374 func (pct parsedContentType) equals(mediaType string, params map[string]string) bool { 375 if pct.mediaType != mediaType { 376 return false 377 } 378 // if pct has no params, don't care about other's params 379 if len(pct.params) == 0 { 380 return true 381 } 382 383 // if pct has any params, they must be identical to other's. 384 if len(pct.params) != len(params) { 385 return false 386 } 387 for k, v := range pct.params { 388 if w, ok := params[k]; !ok || v != w { 389 return false 390 } 391 } 392 return true 393 } 394 395 // Used for functional configuration. 396 type config struct { 397 minSize int 398 level int 399 contentTypes []parsedContentType 400 } 401 402 func (c *config) validate() error { 403 if c.level != gzip.DefaultCompression && (c.level < gzip.BestSpeed || c.level > gzip.BestCompression) { 404 return fmt.Errorf("invalid compression level requested: %d", c.level) 405 } 406 407 if c.minSize < 0 { 408 return fmt.Errorf("minimum size must be more than zero") 409 } 410 411 return nil 412 } 413 414 type Option func(c *config) 415 416 func MinSize(size int) Option { 417 return func(c *config) { 418 c.minSize = size 419 } 420 } 421 422 func CompressionLevel(level int) Option { 423 return func(c *config) { 424 c.level = level 425 } 426 } 427 428 // ContentTypes specifies a list of content types to compare 429 // the Content-Type header to before compressing. If none 430 // match, the response will be returned as-is. 431 // 432 // Content types are compared in a case-insensitive, whitespace-ignored 433 // manner. 434 // 435 // A MIME type without any other directive will match a content type 436 // that has the same MIME type, regardless of that content type's other 437 // directives. I.e., "text/html" will match both "text/html" and 438 // "text/html; charset=utf-8". 439 // 440 // A MIME type with any other directive will only match a content type 441 // that has the same MIME type and other directives. I.e., 442 // "text/html; charset=utf-8" will only match "text/html; charset=utf-8". 443 // 444 // By default, responses are gzipped regardless of 445 // Content-Type. 446 func ContentTypes(types []string) Option { 447 return func(c *config) { 448 c.contentTypes = []parsedContentType{} 449 for _, v := range types { 450 mediaType, params, err := mime.ParseMediaType(v) 451 if err == nil { 452 c.contentTypes = append(c.contentTypes, parsedContentType{mediaType, params}) 453 } 454 } 455 } 456 } 457 458 // GzipHandler wraps an HTTP handler, to transparently gzip the response body if 459 // the client supports it (via the Accept-Encoding header). This will compress at 460 // the default compression level. 461 func GzipHandler(h http.Handler) http.Handler { 462 wrapper, _ := NewGzipLevelHandler(gzip.DefaultCompression) 463 return wrapper(h) 464 } 465 466 // requestAcceptance checks whether a given HTTP request indicates that it will 467 // accept a gzipped response and whether it's going to reject an non-encoded response. 468 // 469 // acceptsGzip is true if the given HTTP request indicates that it will 470 // accept a gzipped response and/or an identity request. 471 // rejectsIdentity is false if the given HTTP request didn't explicitly exclude identity encoding. 472 // I.e., either "identity;q=0" or "*;q=0" without a more specific entry for "identity". 473 // See https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.4 474 func requestAcceptance(r *http.Request) (acceptsGzip bool, rejectsIdentity bool) { 475 acceptedEncodings, _ := parseEncodings(r.Header.Get(acceptEncoding)) 476 477 identity, iset := acceptedEncodings["identity"] 478 wildcard, wset := acceptedEncodings["*"] 479 rejectsIdentity = (iset && identity == 0) || (!iset && wset && wildcard == 0) 480 481 gzip, gzset := acceptedEncodings["gzip"] 482 acceptsGzip = gzip > 0 || (!gzset && wildcard > 0) 483 484 return acceptsGzip, rejectsIdentity 485 } 486 487 // returns true if we've been configured to compress the specific content type. 488 func handleContentType(contentTypes []parsedContentType, ct string) bool { 489 // If unknown, then handle by default. 490 if ct == "" { 491 return true 492 } 493 494 // If contentTypes is empty we handle all content types. 495 if len(contentTypes) == 0 { 496 return true 497 } 498 499 mediaType, params, err := mime.ParseMediaType(ct) 500 if err != nil { 501 return false 502 } 503 504 for _, c := range contentTypes { 505 if c.equals(mediaType, params) { 506 return true 507 } 508 } 509 510 return false 511 } 512 513 // parseEncodings attempts to parse a list of codings, per RFC 2616, as might 514 // appear in an Accept-Encoding header. It returns a map of content-codings to 515 // quality values, and an error containing the errors encountered. It's probably 516 // safe to ignore those, because silently ignoring errors is how the internet 517 // works. 518 // 519 // See: http://tools.ietf.org/html/rfc2616#section-14.3. 520 func parseEncodings(s string) (codings, error) { 521 c := make(codings) 522 var e []string 523 var ss string 524 var found bool 525 526 for { 527 ss, s, found = strings.Cut(s, ",") 528 coding, qvalue, err := parseCoding(ss) 529 530 if err != nil { 531 e = append(e, err.Error()) 532 } else { 533 c[coding] = qvalue 534 } 535 536 if !found { 537 break 538 } 539 } 540 541 // TODO (adammck): Use a proper multi-error struct, so the individual errors 542 // can be extracted if anyone cares. 543 if len(e) > 0 { 544 return c, fmt.Errorf("errors while parsing encodings: %s", strings.Join(e, ", ")) 545 } 546 547 return c, nil 548 } 549 550 // parseCoding parses a single conding (content-coding with an optional qvalue), 551 // as might appear in an Accept-Encoding header. It attempts to forgive minor 552 // formatting errors. 553 func parseCoding(s string) (coding string, qvalue float64, err error) { 554 for n, part := range strings.Split(s, ";") { 555 part = strings.TrimSpace(part) 556 qvalue = DefaultQValue 557 558 if n == 0 { 559 coding = strings.ToLower(part) 560 } else if strings.HasPrefix(part, "q=") { 561 qvalue, err = strconv.ParseFloat(strings.TrimPrefix(part, "q="), 64) 562 563 if qvalue < 0.0 { 564 qvalue = 0.0 565 } else if qvalue > 1.0 { 566 qvalue = 1.0 567 } 568 } 569 } 570 571 if coding == "" { 572 err = fmt.Errorf("empty content-coding") 573 } 574 575 return 576 }