golang.org/x/net@v0.25.1-0.20240516223405-c87a5b62e243/http2/hpack/hpack.go (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package hpack implements HPACK, a compression format for
     6  // efficiently representing HTTP header fields in the context of HTTP/2.
     7  //
     8  // See http://tools.ietf.org/html/draft-ietf-httpbis-header-compression-09
     9  package hpack
    10  
    11  import (
    12  	"bytes"
    13  	"errors"
    14  	"fmt"
    15  )
    16  
    17  // A DecodingError is something the spec defines as a decoding error.
    18  type DecodingError struct {
    19  	Err error
    20  }
    21  
    22  func (de DecodingError) Error() string {
    23  	return fmt.Sprintf("decoding error: %v", de.Err)
    24  }
    25  
    26  // An InvalidIndexError is returned when an encoder references a table
    27  // entry before the static table or after the end of the dynamic table.
    28  type InvalidIndexError int
    29  
    30  func (e InvalidIndexError) Error() string {
    31  	return fmt.Sprintf("invalid indexed representation index %d", int(e))
    32  }
    33  
    34  // A HeaderField is a name-value pair. Both the name and value are
    35  // treated as opaque sequences of octets.
    36  type HeaderField struct {
    37  	Name, Value string
    38  
    39  	// Sensitive means that this header field should never be
    40  	// indexed.
    41  	Sensitive bool
    42  }
    43  
    44  // IsPseudo reports whether the header field is an http2 pseudo header.
    45  // That is, it reports whether it starts with a colon.
    46  // It is not otherwise guaranteed to be a valid pseudo header field,
    47  // though.
    48  func (hf HeaderField) IsPseudo() bool {
    49  	return len(hf.Name) != 0 && hf.Name[0] == ':'
    50  }
    51  
    52  func (hf HeaderField) String() string {
    53  	var suffix string
    54  	if hf.Sensitive {
    55  		suffix = " (sensitive)"
    56  	}
    57  	return fmt.Sprintf("header field %q = %q%s", hf.Name, hf.Value, suffix)
    58  }
    59  
    60  // Size returns the size of an entry per RFC 7541 section 4.1.
    61  func (hf HeaderField) Size() uint32 {
    62  	// https://httpwg.org/specs/rfc7541.html#rfc.section.4.1
    63  	// "The size of the dynamic table is the sum of the size of
    64  	// its entries. The size of an entry is the sum of its name's
    65  	// length in octets (as defined in Section 5.2), its value's
    66  	// length in octets (see Section 5.2), plus 32.  The size of
    67  	// an entry is calculated using the length of the name and
    68  	// value without any Huffman encoding applied."
    69  
    70  	// This can overflow if somebody makes a large HeaderField
    71  	// Name and/or Value by hand, but we don't care, because that
    72  	// won't happen on the wire because the encoding doesn't allow
    73  	// it.
    74  	return uint32(len(hf.Name) + len(hf.Value) + 32)
    75  }
    76  
    77  // A Decoder is the decoding context for incremental processing of
    78  // header blocks.
    79  type Decoder struct {
    80  	dynTab dynamicTable
    81  	emit   func(f HeaderField)
    82  
    83  	emitEnabled bool // whether calls to emit are enabled
    84  	maxStrLen   int  // 0 means unlimited
    85  
    86  	// buf is the unparsed buffer. It's only written to
    87  	// saveBuf if it was truncated in the middle of a header
    88  	// block. Because it's usually not owned, we can only
    89  	// process it under Write.
    90  	buf []byte // not owned; only valid during Write
    91  
    92  	// saveBuf is previous data passed to Write which we weren't able
    93  	// to fully parse before. Unlike buf, we own this data.
    94  	saveBuf bytes.Buffer
    95  
    96  	firstField bool // processing the first field of the header block
    97  }
    98  
    99  // NewDecoder returns a new decoder with the provided maximum dynamic
   100  // table size. The emitFunc will be called for each valid field
   101  // parsed, in the same goroutine as calls to Write, before Write returns.
   102  func NewDecoder(maxDynamicTableSize uint32, emitFunc func(f HeaderField)) *Decoder {
   103  	d := &Decoder{
   104  		emit:        emitFunc,
   105  		emitEnabled: true,
   106  		firstField:  true,
   107  	}
   108  	d.dynTab.table.init()
   109  	d.dynTab.allowedMaxSize = maxDynamicTableSize
   110  	d.dynTab.setMaxSize(maxDynamicTableSize)
   111  	return d
   112  }
   113  
   114  // ErrStringLength is returned by Decoder.Write when the max string length
   115  // (as configured by Decoder.SetMaxStringLength) would be violated.
   116  var ErrStringLength = errors.New("hpack: string too long")
   117  
   118  // SetMaxStringLength sets the maximum size of a HeaderField name or
   119  // value string. If a string exceeds this length (even after any
   120  // decompression), Write will return ErrStringLength.
   121  // A value of 0 means unlimited and is the default from NewDecoder.
   122  func (d *Decoder) SetMaxStringLength(n int) {
   123  	d.maxStrLen = n
   124  }
   125  
   126  // SetEmitFunc changes the callback used when new header fields
   127  // are decoded.
   128  // It must be non-nil. It does not affect EmitEnabled.
   129  func (d *Decoder) SetEmitFunc(emitFunc func(f HeaderField)) {
   130  	d.emit = emitFunc
   131  }
   132  
   133  // SetEmitEnabled controls whether the emitFunc provided to NewDecoder
   134  // should be called. The default is true.
   135  //
   136  // This facility exists to let servers enforce MAX_HEADER_LIST_SIZE
   137  // while still decoding and keeping in-sync with decoder state, but
   138  // without doing unnecessary decompression or generating unnecessary
   139  // garbage for header fields past the limit.
   140  func (d *Decoder) SetEmitEnabled(v bool) { d.emitEnabled = v }
   141  
   142  // EmitEnabled reports whether calls to the emitFunc provided to NewDecoder
   143  // are currently enabled. The default is true.
   144  func (d *Decoder) EmitEnabled() bool { return d.emitEnabled }
   145  
   146  // TODO: add method *Decoder.Reset(maxSize, emitFunc) to let callers re-use Decoders and their
   147  // underlying buffers for garbage reasons.
   148  
   149  func (d *Decoder) SetMaxDynamicTableSize(v uint32) {
   150  	d.dynTab.setMaxSize(v)
   151  }
   152  
   153  // SetAllowedMaxDynamicTableSize sets the upper bound that the encoded
   154  // stream (via dynamic table size updates) may set the maximum size
   155  // to.
   156  func (d *Decoder) SetAllowedMaxDynamicTableSize(v uint32) {
   157  	d.dynTab.allowedMaxSize = v
   158  }
   159  
   160  type dynamicTable struct {
   161  	// https://httpwg.org/specs/rfc7541.html#rfc.section.2.3.2
   162  	table          headerFieldTable
   163  	size           uint32 // in bytes
   164  	maxSize        uint32 // current maxSize
   165  	allowedMaxSize uint32 // maxSize may go up to this, inclusive
   166  }
   167  
   168  func (dt *dynamicTable) setMaxSize(v uint32) {
   169  	dt.maxSize = v
   170  	dt.evict()
   171  }
   172  
   173  func (dt *dynamicTable) add(f HeaderField) {
   174  	dt.table.addEntry(f)
   175  	dt.size += f.Size()
   176  	dt.evict()
   177  }
   178  
   179  // If we're too big, evict old stuff.
   180  func (dt *dynamicTable) evict() {
   181  	var n int
   182  	for dt.size > dt.maxSize && n < dt.table.len() {
   183  		dt.size -= dt.table.ents[n].Size()
   184  		n++
   185  	}
   186  	dt.table.evictOldest(n)
   187  }
   188  
   189  func (d *Decoder) maxTableIndex() int {
   190  	// This should never overflow. RFC 7540 Section 6.5.2 limits the size of
   191  	// the dynamic table to 2^32 bytes, where each entry will occupy more than
   192  	// one byte. Further, the staticTable has a fixed, small length.
   193  	return d.dynTab.table.len() + staticTable.len()
   194  }
   195  
   196  func (d *Decoder) at(i uint64) (hf HeaderField, ok bool) {
   197  	// See Section 2.3.3.
   198  	if i == 0 {
   199  		return
   200  	}
   201  	if i <= uint64(staticTable.len()) {
   202  		return staticTable.ents[i-1], true
   203  	}
   204  	if i > uint64(d.maxTableIndex()) {
   205  		return
   206  	}
   207  	// In the dynamic table, newer entries have lower indices.
   208  	// However, dt.ents[0] is the oldest entry. Hence, dt.ents is
   209  	// the reversed dynamic table.
   210  	dt := d.dynTab.table
   211  	return dt.ents[dt.len()-(int(i)-staticTable.len())], true
   212  }
   213  
   214  // DecodeFull decodes an entire block.
   215  //
   216  // TODO: remove this method and make it incremental later? This is
   217  // easier for debugging now.
   218  func (d *Decoder) DecodeFull(p []byte) ([]HeaderField, error) {
   219  	var hf []HeaderField
   220  	saveFunc := d.emit
   221  	defer func() { d.emit = saveFunc }()
   222  	d.emit = func(f HeaderField) { hf = append(hf, f) }
   223  	if _, err := d.Write(p); err != nil {
   224  		return nil, err
   225  	}
   226  	if err := d.Close(); err != nil {
   227  		return nil, err
   228  	}
   229  	return hf, nil
   230  }
   231  
   232  // Close declares that the decoding is complete and resets the Decoder
   233  // to be reused again for a new header block. If there is any remaining
   234  // data in the decoder's buffer, Close returns an error.
   235  func (d *Decoder) Close() error {
   236  	if d.saveBuf.Len() > 0 {
   237  		d.saveBuf.Reset()
   238  		return DecodingError{errors.New("truncated headers")}
   239  	}
   240  	d.firstField = true
   241  	return nil
   242  }
   243  
   244  func (d *Decoder) Write(p []byte) (n int, err error) {
   245  	if len(p) == 0 {
   246  		// Prevent state machine CPU attacks (making us redo
   247  		// work up to the point of finding out we don't have
   248  		// enough data)
   249  		return
   250  	}
   251  	// Only copy the data if we have to. Optimistically assume
   252  	// that p will contain a complete header block.
   253  	if d.saveBuf.Len() == 0 {
   254  		d.buf = p
   255  	} else {
   256  		d.saveBuf.Write(p)
   257  		d.buf = d.saveBuf.Bytes()
   258  		d.saveBuf.Reset()
   259  	}
   260  
   261  	for len(d.buf) > 0 {
   262  		err = d.parseHeaderFieldRepr()
   263  		if err == errNeedMore {
   264  			// Extra paranoia, making sure saveBuf won't
   265  			// get too large. All the varint and string
   266  			// reading code earlier should already catch
   267  			// overlong things and return ErrStringLength,
   268  			// but keep this as a last resort.
   269  			const varIntOverhead = 8 // conservative
   270  			if d.maxStrLen != 0 && int64(len(d.buf)) > 2*(int64(d.maxStrLen)+varIntOverhead) {
   271  				return 0, ErrStringLength
   272  			}
   273  			d.saveBuf.Write(d.buf)
   274  			return len(p), nil
   275  		}
   276  		d.firstField = false
   277  		if err != nil {
   278  			break
   279  		}
   280  	}
   281  	return len(p), err
   282  }
   283  
   284  // errNeedMore is an internal sentinel error value that means the
   285  // buffer is truncated and we need to read more data before we can
   286  // continue parsing.
   287  var errNeedMore = errors.New("need more data")
   288  
   289  type indexType int
   290  
   291  const (
   292  	indexedTrue indexType = iota
   293  	indexedFalse
   294  	indexedNever
   295  )
   296  
   297  func (v indexType) indexed() bool   { return v == indexedTrue }
   298  func (v indexType) sensitive() bool { return v == indexedNever }
   299  
   300  // returns errNeedMore if there isn't enough data available.
   301  // any other error is fatal.
   302  // consumes d.buf iff it returns nil.
   303  // precondition: must be called with len(d.buf) > 0
   304  func (d *Decoder) parseHeaderFieldRepr() error {
   305  	b := d.buf[0]
   306  	switch {
   307  	case b&128 != 0:
   308  		// Indexed representation.
   309  		// High bit set?
   310  		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.1
   311  		return d.parseFieldIndexed()
   312  	case b&192 == 64:
   313  		// 6.2.1 Literal Header Field with Incremental Indexing
   314  		// 0b10xxxxxx: top two bits are 10
   315  		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.2.1
   316  		return d.parseFieldLiteral(6, indexedTrue)
   317  	case b&240 == 0:
   318  		// 6.2.2 Literal Header Field without Indexing
   319  		// 0b0000xxxx: top four bits are 0000
   320  		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.2.2
   321  		return d.parseFieldLiteral(4, indexedFalse)
   322  	case b&240 == 16:
   323  		// 6.2.3 Literal Header Field never Indexed
   324  		// 0b0001xxxx: top four bits are 0001
   325  		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.2.3
   326  		return d.parseFieldLiteral(4, indexedNever)
   327  	case b&224 == 32:
   328  		// 6.3 Dynamic Table Size Update
   329  		// Top three bits are '001'.
   330  		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.3
   331  		return d.parseDynamicTableSizeUpdate()
   332  	}
   333  
   334  	return DecodingError{errors.New("invalid encoding")}
   335  }
   336  
   337  // (same invariants and behavior as parseHeaderFieldRepr)
   338  func (d *Decoder) parseFieldIndexed() error {
   339  	buf := d.buf
   340  	idx, buf, err := readVarInt(7, buf)
   341  	if err != nil {
   342  		return err
   343  	}
   344  	hf, ok := d.at(idx)
   345  	if !ok {
   346  		return DecodingError{InvalidIndexError(idx)}
   347  	}
   348  	d.buf = buf
   349  	return d.callEmit(HeaderField{Name: hf.Name, Value: hf.Value})
   350  }
   351  
   352  // (same invariants and behavior as parseHeaderFieldRepr)
   353  func (d *Decoder) parseFieldLiteral(n uint8, it indexType) error {
   354  	buf := d.buf
   355  	nameIdx, buf, err := readVarInt(n, buf)
   356  	if err != nil {
   357  		return err
   358  	}
   359  
   360  	var hf HeaderField
   361  	wantStr := d.emitEnabled || it.indexed()
   362  	var undecodedName undecodedString
   363  	if nameIdx > 0 {
   364  		ihf, ok := d.at(nameIdx)
   365  		if !ok {
   366  			return DecodingError{InvalidIndexError(nameIdx)}
   367  		}
   368  		hf.Name = ihf.Name
   369  	} else {
   370  		undecodedName, buf, err = d.readString(buf)
   371  		if err != nil {
   372  			return err
   373  		}
   374  	}
   375  	undecodedValue, buf, err := d.readString(buf)
   376  	if err != nil {
   377  		return err
   378  	}
   379  	if wantStr {
   380  		if nameIdx <= 0 {
   381  			hf.Name, err = d.decodeString(undecodedName)
   382  			if err != nil {
   383  				return err
   384  			}
   385  		}
   386  		hf.Value, err = d.decodeString(undecodedValue)
   387  		if err != nil {
   388  			return err
   389  		}
   390  	}
   391  	d.buf = buf
   392  	if it.indexed() {
   393  		d.dynTab.add(hf)
   394  	}
   395  	hf.Sensitive = it.sensitive()
   396  	return d.callEmit(hf)
   397  }
   398  
   399  func (d *Decoder) callEmit(hf HeaderField) error {
   400  	if d.maxStrLen != 0 {
   401  		if len(hf.Name) > d.maxStrLen || len(hf.Value) > d.maxStrLen {
   402  			return ErrStringLength
   403  		}
   404  	}
   405  	if d.emitEnabled {
   406  		d.emit(hf)
   407  	}
   408  	return nil
   409  }
   410  
   411  // (same invariants and behavior as parseHeaderFieldRepr)
   412  func (d *Decoder) parseDynamicTableSizeUpdate() error {
   413  	// RFC 7541, sec 4.2: This dynamic table size update MUST occur at the
   414  	// beginning of the first header block following the change to the dynamic table size.
   415  	if !d.firstField && d.dynTab.size > 0 {
   416  		return DecodingError{errors.New("dynamic table size update MUST occur at the beginning of a header block")}
   417  	}
   418  
   419  	buf := d.buf
   420  	size, buf, err := readVarInt(5, buf)
   421  	if err != nil {
   422  		return err
   423  	}
   424  	if size > uint64(d.dynTab.allowedMaxSize) {
   425  		return DecodingError{errors.New("dynamic table size update too large")}
   426  	}
   427  	d.dynTab.setMaxSize(uint32(size))
   428  	d.buf = buf
   429  	return nil
   430  }
   431  
   432  var errVarintOverflow = DecodingError{errors.New("varint integer overflow")}
   433  
   434  // readVarInt reads an unsigned variable length integer off the
   435  // beginning of p. n is the parameter as described in
   436  // https://httpwg.org/specs/rfc7541.html#rfc.section.5.1.
   437  //
   438  // n must always be between 1 and 8.
   439  //
   440  // The returned remain buffer is either a smaller suffix of p, or err != nil.
   441  // The error is errNeedMore if p doesn't contain a complete integer.
   442  func readVarInt(n byte, p []byte) (i uint64, remain []byte, err error) {
   443  	if n < 1 || n > 8 {
   444  		panic("bad n")
   445  	}
   446  	if len(p) == 0 {
   447  		return 0, p, errNeedMore
   448  	}
   449  	i = uint64(p[0])
   450  	if n < 8 {
   451  		i &= (1 << uint64(n)) - 1
   452  	}
   453  	if i < (1<<uint64(n))-1 {
   454  		return i, p[1:], nil
   455  	}
   456  
   457  	origP := p
   458  	p = p[1:]
   459  	var m uint64
   460  	for len(p) > 0 {
   461  		b := p[0]
   462  		p = p[1:]
   463  		i += uint64(b&127) << m
   464  		if b&128 == 0 {
   465  			return i, p, nil
   466  		}
   467  		m += 7
   468  		if m >= 63 { // TODO: proper overflow check. making this up.
   469  			return 0, origP, errVarintOverflow
   470  		}
   471  	}
   472  	return 0, origP, errNeedMore
   473  }
   474  
   475  // readString reads an hpack string from p.
   476  //
   477  // It returns a reference to the encoded string data to permit deferring decode costs
   478  // until after the caller verifies all data is present.
   479  func (d *Decoder) readString(p []byte) (u undecodedString, remain []byte, err error) {
   480  	if len(p) == 0 {
   481  		return u, p, errNeedMore
   482  	}
   483  	isHuff := p[0]&128 != 0
   484  	strLen, p, err := readVarInt(7, p)
   485  	if err != nil {
   486  		return u, p, err
   487  	}
   488  	if d.maxStrLen != 0 && strLen > uint64(d.maxStrLen) {
   489  		// Returning an error here means Huffman decoding errors
   490  		// for non-indexed strings past the maximum string length
   491  		// are ignored, but the server is returning an error anyway
   492  		// and because the string is not indexed the error will not
   493  		// affect the decoding state.
   494  		return u, nil, ErrStringLength
   495  	}
   496  	if uint64(len(p)) < strLen {
   497  		return u, p, errNeedMore
   498  	}
   499  	u.isHuff = isHuff
   500  	u.b = p[:strLen]
   501  	return u, p[strLen:], nil
   502  }
   503  
   504  type undecodedString struct {
   505  	isHuff bool
   506  	b      []byte
   507  }
   508  
   509  func (d *Decoder) decodeString(u undecodedString) (string, error) {
   510  	if !u.isHuff {
   511  		return string(u.b), nil
   512  	}
   513  	buf := bufPool.Get().(*bytes.Buffer)
   514  	buf.Reset() // don't trust others
   515  	var s string
   516  	err := huffmanDecode(buf, d.maxStrLen, u.b)
   517  	if err == nil {
   518  		s = buf.String()
   519  	}
   520  	buf.Reset() // be nice to GC
   521  	bufPool.Put(buf)
   522  	return s, err
   523  }