github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/tcpip/header/tcp.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package header
    16  
    17  import (
    18  	"encoding/binary"
    19  
    20  	"github.com/google/btree"
    21  	"github.com/metacubex/gvisor/pkg/tcpip"
    22  	"github.com/metacubex/gvisor/pkg/tcpip/checksum"
    23  	"github.com/metacubex/gvisor/pkg/tcpip/seqnum"
    24  )
    25  
    26  // These constants are the offsets of the respective fields in the TCP header.
    27  const (
    28  	TCPSrcPortOffset   = 0
    29  	TCPDstPortOffset   = 2
    30  	TCPSeqNumOffset    = 4
    31  	TCPAckNumOffset    = 8
    32  	TCPDataOffset      = 12
    33  	TCPFlagsOffset     = 13
    34  	TCPWinSizeOffset   = 14
    35  	TCPChecksumOffset  = 16
    36  	TCPUrgentPtrOffset = 18
    37  )
    38  
    39  const (
    40  	// MaxWndScale is maximum allowed window scaling, as described in
    41  	// RFC 1323, section 2.3, page 11.
    42  	MaxWndScale = 14
    43  
    44  	// TCPMaxSACKBlocks is the maximum number of SACK blocks that can
    45  	// be encoded in a TCP option field.
    46  	TCPMaxSACKBlocks = 4
    47  )
    48  
    49  // TCPFlags is the dedicated type for TCP flags.
    50  type TCPFlags uint8
    51  
    52  // Intersects returns true iff there are flags common to both f and o.
    53  func (f TCPFlags) Intersects(o TCPFlags) bool {
    54  	return f&o != 0
    55  }
    56  
    57  // Contains returns true iff all the flags in o are contained within f.
    58  func (f TCPFlags) Contains(o TCPFlags) bool {
    59  	return f&o == o
    60  }
    61  
    62  // String implements Stringer.String.
    63  func (f TCPFlags) String() string {
    64  	flagsStr := []byte("FSRPAUEC")
    65  	for i := range flagsStr {
    66  		if f&(1<<uint(i)) == 0 {
    67  			flagsStr[i] = ' '
    68  		}
    69  	}
    70  	return string(flagsStr)
    71  }
    72  
    73  // Flags that may be set in a TCP segment.
    74  const (
    75  	TCPFlagFin TCPFlags = 1 << iota
    76  	TCPFlagSyn
    77  	TCPFlagRst
    78  	TCPFlagPsh
    79  	TCPFlagAck
    80  	TCPFlagUrg
    81  	TCPFlagEce
    82  	TCPFlagCwr
    83  )
    84  
    85  // Options that may be present in a TCP segment.
    86  const (
    87  	TCPOptionEOL           = 0
    88  	TCPOptionNOP           = 1
    89  	TCPOptionMSS           = 2
    90  	TCPOptionWS            = 3
    91  	TCPOptionTS            = 8
    92  	TCPOptionSACKPermitted = 4
    93  	TCPOptionSACK          = 5
    94  )
    95  
    96  // Option Lengths.
    97  const (
    98  	TCPOptionMSSLength           = 4
    99  	TCPOptionTSLength            = 10
   100  	TCPOptionWSLength            = 3
   101  	TCPOptionSackPermittedLength = 2
   102  )
   103  
   104  // TCPFields contains the fields of a TCP packet. It is used to describe the
   105  // fields of a packet that needs to be encoded.
   106  type TCPFields struct {
   107  	// SrcPort is the "source port" field of a TCP packet.
   108  	SrcPort uint16
   109  
   110  	// DstPort is the "destination port" field of a TCP packet.
   111  	DstPort uint16
   112  
   113  	// SeqNum is the "sequence number" field of a TCP packet.
   114  	SeqNum uint32
   115  
   116  	// AckNum is the "acknowledgement number" field of a TCP packet.
   117  	AckNum uint32
   118  
   119  	// DataOffset is the "data offset" field of a TCP packet. It is the length of
   120  	// the TCP header in bytes.
   121  	DataOffset uint8
   122  
   123  	// Flags is the "flags" field of a TCP packet.
   124  	Flags TCPFlags
   125  
   126  	// WindowSize is the "window size" field of a TCP packet.
   127  	WindowSize uint16
   128  
   129  	// Checksum is the "checksum" field of a TCP packet.
   130  	Checksum uint16
   131  
   132  	// UrgentPointer is the "urgent pointer" field of a TCP packet.
   133  	UrgentPointer uint16
   134  }
   135  
   136  // TCPSynOptions is used to return the parsed TCP Options in a syn
   137  // segment.
   138  //
   139  // +stateify savable
   140  type TCPSynOptions struct {
   141  	// MSS is the maximum segment size provided by the peer in the SYN.
   142  	MSS uint16
   143  
   144  	// WS is the window scale option provided by the peer in the SYN.
   145  	//
   146  	// Set to -1 if no window scale option was provided.
   147  	WS int
   148  
   149  	// TS is true if the timestamp option was provided in the syn/syn-ack.
   150  	TS bool
   151  
   152  	// TSVal is the value of the TSVal field in the timestamp option.
   153  	TSVal uint32
   154  
   155  	// TSEcr is the value of the TSEcr field in the timestamp option.
   156  	TSEcr uint32
   157  
   158  	// SACKPermitted is true if the SACK option was provided in the SYN/SYN-ACK.
   159  	SACKPermitted bool
   160  
   161  	// Flags if specified are set on the outgoing SYN. The SYN flag is
   162  	// always set.
   163  	Flags TCPFlags
   164  }
   165  
   166  // SACKBlock represents a single contiguous SACK block.
   167  //
   168  // +stateify savable
   169  type SACKBlock struct {
   170  	// Start indicates the lowest sequence number in the block.
   171  	Start seqnum.Value
   172  
   173  	// End indicates the sequence number immediately following the last
   174  	// sequence number of this block.
   175  	End seqnum.Value
   176  }
   177  
   178  // Less returns true if r.Start < b.Start.
   179  func (r SACKBlock) Less(b btree.Item) bool {
   180  	return r.Start.LessThan(b.(SACKBlock).Start)
   181  }
   182  
   183  // Contains returns true if b is completely contained in r.
   184  func (r SACKBlock) Contains(b SACKBlock) bool {
   185  	return r.Start.LessThanEq(b.Start) && b.End.LessThanEq(r.End)
   186  }
   187  
   188  // TCPOptions are used to parse and cache the TCP segment options for a non
   189  // syn/syn-ack segment.
   190  //
   191  // +stateify savable
   192  type TCPOptions struct {
   193  	// TS is true if the TimeStamp option is enabled.
   194  	TS bool
   195  
   196  	// TSVal is the value in the TSVal field of the segment.
   197  	TSVal uint32
   198  
   199  	// TSEcr is the value in the TSEcr field of the segment.
   200  	TSEcr uint32
   201  
   202  	// SACKBlocks are the SACK blocks specified in the segment.
   203  	SACKBlocks []SACKBlock
   204  }
   205  
   206  // TCP represents a TCP header stored in a byte array.
   207  type TCP []byte
   208  
   209  const (
   210  	// TCPMinimumSize is the minimum size of a valid TCP packet.
   211  	TCPMinimumSize = 20
   212  
   213  	// TCPOptionsMaximumSize is the maximum size of TCP options.
   214  	TCPOptionsMaximumSize = 40
   215  
   216  	// TCPHeaderMaximumSize is the maximum header size of a TCP packet.
   217  	TCPHeaderMaximumSize = TCPMinimumSize + TCPOptionsMaximumSize
   218  
   219  	// TCPTotalHeaderMaximumSize is the maximum size of headers from all layers in
   220  	// a TCP packet. It analogous to MAX_TCP_HEADER in Linux.
   221  	//
   222  	// TODO(b/319936470): Investigate why this needs to be at least 140 bytes. In
   223  	// Linux this value is at least 160, but in theory we should be able to use
   224  	// 138. In practice anything less than 140 starts to break GSO on gVNIC
   225  	// hardware.
   226  	TCPTotalHeaderMaximumSize = 160
   227  
   228  	// TCPProtocolNumber is TCP's transport protocol number.
   229  	TCPProtocolNumber tcpip.TransportProtocolNumber = 6
   230  
   231  	// TCPMinimumMSS is the minimum acceptable value for MSS. This is the
   232  	// same as the value TCP_MIN_MSS defined net/tcp.h.
   233  	TCPMinimumMSS = IPv4MaximumHeaderSize + TCPHeaderMaximumSize + MinIPFragmentPayloadSize - IPv4MinimumSize - TCPMinimumSize
   234  
   235  	// TCPMinimumSendMSS is the minimum value for MSS in a sender. This is the
   236  	// same as the value TCP_MIN_SND_MSS in net/tcp.h.
   237  	TCPMinimumSendMSS = TCPOptionsMaximumSize + MinIPFragmentPayloadSize
   238  
   239  	// TCPMaximumMSS is the maximum acceptable value for MSS.
   240  	TCPMaximumMSS = 0xffff
   241  
   242  	// TCPDefaultMSS is the MSS value that should be used if an MSS option
   243  	// is not received from the peer. It's also the value returned by
   244  	// TCP_MAXSEG option for a socket in an unconnected state.
   245  	//
   246  	// Per RFC 1122, page 85: "If an MSS option is not received at
   247  	// connection setup, TCP MUST assume a default send MSS of 536."
   248  	TCPDefaultMSS = 536
   249  )
   250  
   251  // SourcePort returns the "source port" field of the TCP header.
   252  func (b TCP) SourcePort() uint16 {
   253  	return binary.BigEndian.Uint16(b[TCPSrcPortOffset:])
   254  }
   255  
   256  // DestinationPort returns the "destination port" field of the TCP header.
   257  func (b TCP) DestinationPort() uint16 {
   258  	return binary.BigEndian.Uint16(b[TCPDstPortOffset:])
   259  }
   260  
   261  // SequenceNumber returns the "sequence number" field of the TCP header.
   262  func (b TCP) SequenceNumber() uint32 {
   263  	return binary.BigEndian.Uint32(b[TCPSeqNumOffset:])
   264  }
   265  
   266  // AckNumber returns the "ack number" field of the TCP header.
   267  func (b TCP) AckNumber() uint32 {
   268  	return binary.BigEndian.Uint32(b[TCPAckNumOffset:])
   269  }
   270  
   271  // DataOffset returns the "data offset" field of the TCP header. The return
   272  // value is the length of the TCP header in bytes.
   273  func (b TCP) DataOffset() uint8 {
   274  	return (b[TCPDataOffset] >> 4) * 4
   275  }
   276  
   277  // Payload returns the data in the TCP packet.
   278  func (b TCP) Payload() []byte {
   279  	return b[b.DataOffset():]
   280  }
   281  
   282  // Flags returns the flags field of the TCP header.
   283  func (b TCP) Flags() TCPFlags {
   284  	return TCPFlags(b[TCPFlagsOffset])
   285  }
   286  
   287  // WindowSize returns the "window size" field of the TCP header.
   288  func (b TCP) WindowSize() uint16 {
   289  	return binary.BigEndian.Uint16(b[TCPWinSizeOffset:])
   290  }
   291  
   292  // Checksum returns the "checksum" field of the TCP header.
   293  func (b TCP) Checksum() uint16 {
   294  	return binary.BigEndian.Uint16(b[TCPChecksumOffset:])
   295  }
   296  
   297  // UrgentPointer returns the "urgent pointer" field of the TCP header.
   298  func (b TCP) UrgentPointer() uint16 {
   299  	return binary.BigEndian.Uint16(b[TCPUrgentPtrOffset:])
   300  }
   301  
   302  // SetSourcePort sets the "source port" field of the TCP header.
   303  func (b TCP) SetSourcePort(port uint16) {
   304  	binary.BigEndian.PutUint16(b[TCPSrcPortOffset:], port)
   305  }
   306  
   307  // SetDestinationPort sets the "destination port" field of the TCP header.
   308  func (b TCP) SetDestinationPort(port uint16) {
   309  	binary.BigEndian.PutUint16(b[TCPDstPortOffset:], port)
   310  }
   311  
   312  // SetChecksum sets the checksum field of the TCP header.
   313  func (b TCP) SetChecksum(xsum uint16) {
   314  	checksum.Put(b[TCPChecksumOffset:], xsum)
   315  }
   316  
   317  // SetDataOffset sets the data offset field of the TCP header. headerLen should
   318  // be the length of the TCP header in bytes.
   319  func (b TCP) SetDataOffset(headerLen uint8) {
   320  	b[TCPDataOffset] = (headerLen / 4) << 4
   321  }
   322  
   323  // SetSequenceNumber sets the sequence number field of the TCP header.
   324  func (b TCP) SetSequenceNumber(seqNum uint32) {
   325  	binary.BigEndian.PutUint32(b[TCPSeqNumOffset:], seqNum)
   326  }
   327  
   328  // SetAckNumber sets the ack number field of the TCP header.
   329  func (b TCP) SetAckNumber(ackNum uint32) {
   330  	binary.BigEndian.PutUint32(b[TCPAckNumOffset:], ackNum)
   331  }
   332  
   333  // SetFlags sets the flags field of the TCP header.
   334  func (b TCP) SetFlags(flags uint8) {
   335  	b[TCPFlagsOffset] = flags
   336  }
   337  
   338  // SetWindowSize sets the window size field of the TCP header.
   339  func (b TCP) SetWindowSize(rcvwnd uint16) {
   340  	binary.BigEndian.PutUint16(b[TCPWinSizeOffset:], rcvwnd)
   341  }
   342  
   343  // SetUrgentPointer sets the window size field of the TCP header.
   344  func (b TCP) SetUrgentPointer(urgentPointer uint16) {
   345  	binary.BigEndian.PutUint16(b[TCPUrgentPtrOffset:], urgentPointer)
   346  }
   347  
   348  // CalculateChecksum calculates the checksum of the TCP segment.
   349  // partialChecksum is the checksum of the network-layer pseudo-header
   350  // and the checksum of the segment data.
   351  func (b TCP) CalculateChecksum(partialChecksum uint16) uint16 {
   352  	// Calculate the rest of the checksum.
   353  	return checksum.Checksum(b[:b.DataOffset()], partialChecksum)
   354  }
   355  
   356  // IsChecksumValid returns true iff the TCP header's checksum is valid.
   357  func (b TCP) IsChecksumValid(src, dst tcpip.Address, payloadChecksum, payloadLength uint16) bool {
   358  	xsum := PseudoHeaderChecksum(TCPProtocolNumber, src, dst, uint16(b.DataOffset())+payloadLength)
   359  	xsum = checksum.Combine(xsum, payloadChecksum)
   360  	return b.CalculateChecksum(xsum) == 0xffff
   361  }
   362  
   363  // Options returns a slice that holds the unparsed TCP options in the segment.
   364  func (b TCP) Options() []byte {
   365  	return b[TCPMinimumSize:b.DataOffset()]
   366  }
   367  
   368  // ParsedOptions returns a TCPOptions structure which parses and caches the TCP
   369  // option values in the TCP segment. NOTE: Invoking this function repeatedly is
   370  // expensive as it reparses the options on each invocation.
   371  func (b TCP) ParsedOptions() TCPOptions {
   372  	return ParseTCPOptions(b.Options())
   373  }
   374  
   375  func (b TCP) encodeSubset(seq, ack uint32, flags TCPFlags, rcvwnd uint16) {
   376  	binary.BigEndian.PutUint32(b[TCPSeqNumOffset:], seq)
   377  	binary.BigEndian.PutUint32(b[TCPAckNumOffset:], ack)
   378  	b[TCPFlagsOffset] = uint8(flags)
   379  	binary.BigEndian.PutUint16(b[TCPWinSizeOffset:], rcvwnd)
   380  }
   381  
   382  // Encode encodes all the fields of the TCP header.
   383  func (b TCP) Encode(t *TCPFields) {
   384  	b.encodeSubset(t.SeqNum, t.AckNum, t.Flags, t.WindowSize)
   385  	b.SetSourcePort(t.SrcPort)
   386  	b.SetDestinationPort(t.DstPort)
   387  	b.SetDataOffset(t.DataOffset)
   388  	b.SetChecksum(t.Checksum)
   389  	b.SetUrgentPointer(t.UrgentPointer)
   390  }
   391  
   392  // EncodePartial updates a subset of the fields of the TCP header. It is useful
   393  // in cases when similar segments are produced.
   394  func (b TCP) EncodePartial(partialChecksum, length uint16, seqnum, acknum uint32, flags TCPFlags, rcvwnd uint16) {
   395  	// Add the total length and "flags" field contributions to the checksum.
   396  	// We don't use the flags field directly from the header because it's a
   397  	// one-byte field with an odd offset, so it would be accounted for
   398  	// incorrectly by the Checksum routine.
   399  	tmp := make([]byte, 4)
   400  	binary.BigEndian.PutUint16(tmp, length)
   401  	binary.BigEndian.PutUint16(tmp[2:], uint16(flags))
   402  	xsum := checksum.Checksum(tmp, partialChecksum)
   403  
   404  	// Encode the passed-in fields.
   405  	b.encodeSubset(seqnum, acknum, flags, rcvwnd)
   406  
   407  	// Add the contributions of the passed-in fields to the checksum.
   408  	xsum = checksum.Checksum(b[TCPSeqNumOffset:TCPSeqNumOffset+8], xsum)
   409  	xsum = checksum.Checksum(b[TCPWinSizeOffset:TCPWinSizeOffset+2], xsum)
   410  
   411  	// Encode the checksum.
   412  	b.SetChecksum(^xsum)
   413  }
   414  
   415  // SetSourcePortWithChecksumUpdate implements ChecksummableTransport.
   416  func (b TCP) SetSourcePortWithChecksumUpdate(new uint16) {
   417  	old := b.SourcePort()
   418  	b.SetSourcePort(new)
   419  	b.SetChecksum(^checksumUpdate2ByteAlignedUint16(^b.Checksum(), old, new))
   420  }
   421  
   422  // SetDestinationPortWithChecksumUpdate implements ChecksummableTransport.
   423  func (b TCP) SetDestinationPortWithChecksumUpdate(new uint16) {
   424  	old := b.DestinationPort()
   425  	b.SetDestinationPort(new)
   426  	b.SetChecksum(^checksumUpdate2ByteAlignedUint16(^b.Checksum(), old, new))
   427  }
   428  
   429  // UpdateChecksumPseudoHeaderAddress implements ChecksummableTransport.
   430  func (b TCP) UpdateChecksumPseudoHeaderAddress(old, new tcpip.Address, fullChecksum bool) {
   431  	xsum := b.Checksum()
   432  	if fullChecksum {
   433  		xsum = ^xsum
   434  	}
   435  
   436  	xsum = checksumUpdate2ByteAlignedAddress(xsum, old, new)
   437  	if fullChecksum {
   438  		xsum = ^xsum
   439  	}
   440  
   441  	b.SetChecksum(xsum)
   442  }
   443  
   444  // ParseSynOptions parses the options received in a SYN segment and returns the
   445  // relevant ones. opts should point to the option part of the TCP header.
   446  func ParseSynOptions(opts []byte, isAck bool) TCPSynOptions {
   447  	limit := len(opts)
   448  
   449  	synOpts := TCPSynOptions{
   450  		// Per RFC 1122, page 85: "If an MSS option is not received at
   451  		// connection setup, TCP MUST assume a default send MSS of 536."
   452  		MSS: TCPDefaultMSS,
   453  		// If no window scale option is specified, WS in options is
   454  		// returned as -1; this is because the absence of the option
   455  		// indicates that the we cannot use window scaling on the
   456  		// receive end either.
   457  		WS: -1,
   458  	}
   459  
   460  	for i := 0; i < limit; {
   461  		switch opts[i] {
   462  		case TCPOptionEOL:
   463  			i = limit
   464  		case TCPOptionNOP:
   465  			i++
   466  		case TCPOptionMSS:
   467  			if i+4 > limit || opts[i+1] != 4 {
   468  				return synOpts
   469  			}
   470  			mss := uint16(opts[i+2])<<8 | uint16(opts[i+3])
   471  			if mss == 0 {
   472  				return synOpts
   473  			}
   474  			synOpts.MSS = mss
   475  			if mss < TCPMinimumSendMSS {
   476  				synOpts.MSS = TCPMinimumSendMSS
   477  			}
   478  			i += 4
   479  
   480  		case TCPOptionWS:
   481  			if i+3 > limit || opts[i+1] != 3 {
   482  				return synOpts
   483  			}
   484  			ws := int(opts[i+2])
   485  			if ws > MaxWndScale {
   486  				ws = MaxWndScale
   487  			}
   488  			synOpts.WS = ws
   489  			i += 3
   490  
   491  		case TCPOptionTS:
   492  			if i+10 > limit || opts[i+1] != 10 {
   493  				return synOpts
   494  			}
   495  			synOpts.TSVal = binary.BigEndian.Uint32(opts[i+2:])
   496  			if isAck {
   497  				// If the segment is a SYN-ACK then store the Timestamp Echo Reply
   498  				// in the segment.
   499  				synOpts.TSEcr = binary.BigEndian.Uint32(opts[i+6:])
   500  			}
   501  			synOpts.TS = true
   502  			i += 10
   503  		case TCPOptionSACKPermitted:
   504  			if i+2 > limit || opts[i+1] != 2 {
   505  				return synOpts
   506  			}
   507  			synOpts.SACKPermitted = true
   508  			i += 2
   509  
   510  		default:
   511  			// We don't recognize this option, just skip over it.
   512  			if i+2 > limit {
   513  				return synOpts
   514  			}
   515  			l := int(opts[i+1])
   516  			// If the length is incorrect or if l+i overflows the
   517  			// total options length then return false.
   518  			if l < 2 || i+l > limit {
   519  				return synOpts
   520  			}
   521  			i += l
   522  		}
   523  	}
   524  
   525  	return synOpts
   526  }
   527  
   528  // ParseTCPOptions extracts and stores all known options in the provided byte
   529  // slice in a TCPOptions structure.
   530  func ParseTCPOptions(b []byte) TCPOptions {
   531  	opts := TCPOptions{}
   532  	limit := len(b)
   533  	for i := 0; i < limit; {
   534  		switch b[i] {
   535  		case TCPOptionEOL:
   536  			i = limit
   537  		case TCPOptionNOP:
   538  			i++
   539  		case TCPOptionTS:
   540  			if i+10 > limit || (b[i+1] != 10) {
   541  				return opts
   542  			}
   543  			opts.TS = true
   544  			opts.TSVal = binary.BigEndian.Uint32(b[i+2:])
   545  			opts.TSEcr = binary.BigEndian.Uint32(b[i+6:])
   546  			i += 10
   547  		case TCPOptionSACK:
   548  			if i+2 > limit {
   549  				// Malformed SACK block, just return and stop parsing.
   550  				return opts
   551  			}
   552  			sackOptionLen := int(b[i+1])
   553  			if i+sackOptionLen > limit || (sackOptionLen-2)%8 != 0 {
   554  				// Malformed SACK block, just return and stop parsing.
   555  				return opts
   556  			}
   557  			numBlocks := (sackOptionLen - 2) / 8
   558  			opts.SACKBlocks = []SACKBlock{}
   559  			for j := 0; j < numBlocks; j++ {
   560  				start := binary.BigEndian.Uint32(b[i+2+j*8:])
   561  				end := binary.BigEndian.Uint32(b[i+2+j*8+4:])
   562  				opts.SACKBlocks = append(opts.SACKBlocks, SACKBlock{
   563  					Start: seqnum.Value(start),
   564  					End:   seqnum.Value(end),
   565  				})
   566  			}
   567  			i += sackOptionLen
   568  		default:
   569  			// We don't recognize this option, just skip over it.
   570  			if i+2 > limit {
   571  				return opts
   572  			}
   573  			l := int(b[i+1])
   574  			// If the length is incorrect or if l+i overflows the
   575  			// total options length then return false.
   576  			if l < 2 || i+l > limit {
   577  				return opts
   578  			}
   579  			i += l
   580  		}
   581  	}
   582  	return opts
   583  }
   584  
   585  // EncodeMSSOption encodes the MSS TCP option with the provided MSS values in
   586  // the supplied buffer. If the provided buffer is not large enough then it just
   587  // returns without encoding anything. It returns the number of bytes written to
   588  // the provided buffer.
   589  func EncodeMSSOption(mss uint32, b []byte) int {
   590  	if len(b) < TCPOptionMSSLength {
   591  		return 0
   592  	}
   593  	b[0], b[1], b[2], b[3] = TCPOptionMSS, TCPOptionMSSLength, byte(mss>>8), byte(mss)
   594  	return TCPOptionMSSLength
   595  }
   596  
   597  // EncodeWSOption encodes the WS TCP option with the WS value in the
   598  // provided buffer. If the provided buffer is not large enough then it just
   599  // returns without encoding anything. It returns the number of bytes written to
   600  // the provided buffer.
   601  func EncodeWSOption(ws int, b []byte) int {
   602  	if len(b) < TCPOptionWSLength {
   603  		return 0
   604  	}
   605  	b[0], b[1], b[2] = TCPOptionWS, TCPOptionWSLength, uint8(ws)
   606  	return int(b[1])
   607  }
   608  
   609  // EncodeTSOption encodes the provided tsVal and tsEcr values as a TCP timestamp
   610  // option into the provided buffer. If the buffer is smaller than expected it
   611  // just returns without encoding anything. It returns the number of bytes
   612  // written to the provided buffer.
   613  func EncodeTSOption(tsVal, tsEcr uint32, b []byte) int {
   614  	if len(b) < TCPOptionTSLength {
   615  		return 0
   616  	}
   617  	b[0], b[1] = TCPOptionTS, TCPOptionTSLength
   618  	binary.BigEndian.PutUint32(b[2:], tsVal)
   619  	binary.BigEndian.PutUint32(b[6:], tsEcr)
   620  	return int(b[1])
   621  }
   622  
   623  // EncodeSACKPermittedOption encodes a SACKPermitted option into the provided
   624  // buffer. If the buffer is smaller than required it just returns without
   625  // encoding anything. It returns the number of bytes written to the provided
   626  // buffer.
   627  func EncodeSACKPermittedOption(b []byte) int {
   628  	if len(b) < TCPOptionSackPermittedLength {
   629  		return 0
   630  	}
   631  
   632  	b[0], b[1] = TCPOptionSACKPermitted, TCPOptionSackPermittedLength
   633  	return int(b[1])
   634  }
   635  
   636  // EncodeSACKBlocks encodes the provided SACK blocks as a TCP SACK option block
   637  // in the provided slice. It tries to fit in as many blocks as possible based on
   638  // number of bytes available in the provided buffer. It returns the number of
   639  // bytes written to the provided buffer.
   640  func EncodeSACKBlocks(sackBlocks []SACKBlock, b []byte) int {
   641  	if len(sackBlocks) == 0 {
   642  		return 0
   643  	}
   644  	l := len(sackBlocks)
   645  	if l > TCPMaxSACKBlocks {
   646  		l = TCPMaxSACKBlocks
   647  	}
   648  	if ll := (len(b) - 2) / 8; ll < l {
   649  		l = ll
   650  	}
   651  	if l == 0 {
   652  		// There is not enough space in the provided buffer to add
   653  		// any SACK blocks.
   654  		return 0
   655  	}
   656  	b[0] = TCPOptionSACK
   657  	b[1] = byte(l*8 + 2)
   658  	for i := 0; i < l; i++ {
   659  		binary.BigEndian.PutUint32(b[i*8+2:], uint32(sackBlocks[i].Start))
   660  		binary.BigEndian.PutUint32(b[i*8+6:], uint32(sackBlocks[i].End))
   661  	}
   662  	return int(b[1])
   663  }
   664  
   665  // EncodeNOP adds an explicit NOP to the option list.
   666  func EncodeNOP(b []byte) int {
   667  	if len(b) == 0 {
   668  		return 0
   669  	}
   670  	b[0] = TCPOptionNOP
   671  	return 1
   672  }
   673  
   674  // AddTCPOptionPadding adds the required number of TCPOptionNOP to quad align
   675  // the option buffer. It adds padding bytes after the offset specified and
   676  // returns the number of padding bytes added. The passed in options slice
   677  // must have space for the padding bytes.
   678  func AddTCPOptionPadding(options []byte, offset int) int {
   679  	paddingToAdd := -offset & 3
   680  	// Now add any padding bytes that might be required to quad align the
   681  	// options.
   682  	for i := offset; i < offset+paddingToAdd; i++ {
   683  		options[i] = TCPOptionNOP
   684  	}
   685  	return paddingToAdd
   686  }
   687  
   688  // Acceptable checks if a segment that starts at segSeq and has length segLen is
   689  // "acceptable" for arriving in a receive window that starts at rcvNxt and ends
   690  // before rcvAcc, according to the table on page 26 and 69 of RFC 793.
   691  func Acceptable(segSeq seqnum.Value, segLen seqnum.Size, rcvNxt, rcvAcc seqnum.Value) bool {
   692  	if rcvNxt == rcvAcc {
   693  		return segLen == 0 && segSeq == rcvNxt
   694  	}
   695  	if segLen == 0 {
   696  		// rcvWnd is incremented by 1 because that is Linux's behavior despite the
   697  		// RFC.
   698  		return segSeq.InRange(rcvNxt, rcvAcc.Add(1))
   699  	}
   700  	// Page 70 of RFC 793 allows packets that can be made "acceptable" by trimming
   701  	// the payload, so we'll accept any payload that overlaps the receive window.
   702  	// segSeq < rcvAcc is more correct according to RFC, however, Linux does it
   703  	// differently, it uses segSeq <= rcvAcc, we'd want to keep the same behavior
   704  	// as Linux.
   705  	return rcvNxt.LessThan(segSeq.Add(segLen)) && segSeq.LessThanEq(rcvAcc)
   706  }
   707  
   708  // TCPValid returns true if the pkt has a valid TCP header. It checks whether:
   709  //   - The data offset is too small.
   710  //   - The data offset is too large.
   711  //   - The checksum is invalid.
   712  //
   713  // TCPValid corresponds to net/netfilter/nf_conntrack_proto_tcp.c:tcp_error.
   714  func TCPValid(hdr TCP, payloadChecksum func() uint16, payloadSize uint16, srcAddr, dstAddr tcpip.Address, skipChecksumValidation bool) (csum uint16, csumValid, ok bool) {
   715  	if offset := int(hdr.DataOffset()); offset < TCPMinimumSize || offset > len(hdr) {
   716  		return
   717  	}
   718  
   719  	if skipChecksumValidation {
   720  		csumValid = true
   721  	} else {
   722  		csum = hdr.Checksum()
   723  		csumValid = hdr.IsChecksumValid(srcAddr, dstAddr, payloadChecksum(), payloadSize)
   724  	}
   725  	return csum, csumValid, true
   726  }