github.com/polevpn/netstack@v1.10.9/tcpip/header/tcp.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package header
    16  
    17  import (
    18  	"encoding/binary"
    19  
    20  	"github.com/google/btree"
    21  	"github.com/polevpn/netstack/tcpip"
    22  	"github.com/polevpn/netstack/tcpip/seqnum"
    23  )
    24  
    25  // These constants are the offsets of the respective fields in the TCP header.
    26  const (
    27  	TCPSrcPortOffset   = 0
    28  	TCPDstPortOffset   = 2
    29  	TCPSeqNumOffset    = 4
    30  	TCPAckNumOffset    = 8
    31  	TCPDataOffset      = 12
    32  	TCPFlagsOffset     = 13
    33  	TCPWinSizeOffset   = 14
    34  	TCPChecksumOffset  = 16
    35  	TCPUrgentPtrOffset = 18
    36  )
    37  
    38  const (
    39  	// MaxWndScale is maximum allowed window scaling, as described in
    40  	// RFC 1323, section 2.3, page 11.
    41  	MaxWndScale = 14
    42  
    43  	// TCPMaxSACKBlocks is the maximum number of SACK blocks that can
    44  	// be encoded in a TCP option field.
    45  	TCPMaxSACKBlocks = 4
    46  )
    47  
    48  // Flags that may be set in a TCP segment.
    49  const (
    50  	TCPFlagFin = 1 << iota
    51  	TCPFlagSyn
    52  	TCPFlagRst
    53  	TCPFlagPsh
    54  	TCPFlagAck
    55  	TCPFlagUrg
    56  )
    57  
    58  // Options that may be present in a TCP segment.
    59  const (
    60  	TCPOptionEOL           = 0
    61  	TCPOptionNOP           = 1
    62  	TCPOptionMSS           = 2
    63  	TCPOptionWS            = 3
    64  	TCPOptionTS            = 8
    65  	TCPOptionSACKPermitted = 4
    66  	TCPOptionSACK          = 5
    67  )
    68  
    69  // TCPFields contains the fields of a TCP packet. It is used to describe the
    70  // fields of a packet that needs to be encoded.
    71  type TCPFields struct {
    72  	// SrcPort is the "source port" field of a TCP packet.
    73  	SrcPort uint16
    74  
    75  	// DstPort is the "destination port" field of a TCP packet.
    76  	DstPort uint16
    77  
    78  	// SeqNum is the "sequence number" field of a TCP packet.
    79  	SeqNum uint32
    80  
    81  	// AckNum is the "acknowledgement number" field of a TCP packet.
    82  	AckNum uint32
    83  
    84  	// DataOffset is the "data offset" field of a TCP packet.
    85  	DataOffset uint8
    86  
    87  	// Flags is the "flags" field of a TCP packet.
    88  	Flags uint8
    89  
    90  	// WindowSize is the "window size" field of a TCP packet.
    91  	WindowSize uint16
    92  
    93  	// Checksum is the "checksum" field of a TCP packet.
    94  	Checksum uint16
    95  
    96  	// UrgentPointer is the "urgent pointer" field of a TCP packet.
    97  	UrgentPointer uint16
    98  }
    99  
   100  // TCPSynOptions is used to return the parsed TCP Options in a syn
   101  // segment.
   102  type TCPSynOptions struct {
   103  	// MSS is the maximum segment size provided by the peer in the SYN.
   104  	MSS uint16
   105  
   106  	// WS is the window scale option provided by the peer in the SYN.
   107  	//
   108  	// Set to -1 if no window scale option was provided.
   109  	WS int
   110  
   111  	// TS is true if the timestamp option was provided in the syn/syn-ack.
   112  	TS bool
   113  
   114  	// TSVal is the value of the TSVal field in the timestamp option.
   115  	TSVal uint32
   116  
   117  	// TSEcr is the value of the TSEcr field in the timestamp option.
   118  	TSEcr uint32
   119  
   120  	// SACKPermitted is true if the SACK option was provided in the SYN/SYN-ACK.
   121  	SACKPermitted bool
   122  }
   123  
   124  // SACKBlock represents a single contiguous SACK block.
   125  //
   126  // +stateify savable
   127  type SACKBlock struct {
   128  	// Start indicates the lowest sequence number in the block.
   129  	Start seqnum.Value
   130  
   131  	// End indicates the sequence number immediately following the last
   132  	// sequence number of this block.
   133  	End seqnum.Value
   134  }
   135  
   136  // Less returns true if r.Start < b.Start.
   137  func (r SACKBlock) Less(b btree.Item) bool {
   138  	return r.Start.LessThan(b.(SACKBlock).Start)
   139  }
   140  
   141  // Contains returns true if b is completely contained in r.
   142  func (r SACKBlock) Contains(b SACKBlock) bool {
   143  	return r.Start.LessThanEq(b.Start) && b.End.LessThanEq(r.End)
   144  }
   145  
   146  // TCPOptions are used to parse and cache the TCP segment options for a non
   147  // syn/syn-ack segment.
   148  //
   149  // +stateify savable
   150  type TCPOptions struct {
   151  	// TS is true if the TimeStamp option is enabled.
   152  	TS bool
   153  
   154  	// TSVal is the value in the TSVal field of the segment.
   155  	TSVal uint32
   156  
   157  	// TSEcr is the value in the TSEcr field of the segment.
   158  	TSEcr uint32
   159  
   160  	// SACKBlocks are the SACK blocks specified in the segment.
   161  	SACKBlocks []SACKBlock
   162  }
   163  
   164  // TCP represents a TCP header stored in a byte array.
   165  type TCP []byte
   166  
   167  const (
   168  	// TCPMinimumSize is the minimum size of a valid TCP packet.
   169  	TCPMinimumSize = 20
   170  
   171  	// TCPOptionsMaximumSize is the maximum size of TCP options.
   172  	TCPOptionsMaximumSize = 40
   173  
   174  	// TCPHeaderMaximumSize is the maximum header size of a TCP packet.
   175  	TCPHeaderMaximumSize = TCPMinimumSize + TCPOptionsMaximumSize
   176  
   177  	// TCPProtocolNumber is TCP's transport protocol number.
   178  	TCPProtocolNumber tcpip.TransportProtocolNumber = 6
   179  
   180  	// TCPMinimumMSS is the minimum acceptable value for MSS. This is the
   181  	// same as the value TCP_MIN_MSS defined net/tcp.h.
   182  	TCPMinimumMSS = IPv4MaximumHeaderSize + TCPHeaderMaximumSize + MinIPFragmentPayloadSize - IPv4MinimumSize - TCPMinimumSize
   183  
   184  	// TCPMaximumMSS is the maximum acceptable value for MSS.
   185  	TCPMaximumMSS = 0xffff
   186  
   187  	// TCPDefaultMSS is the MSS value that should be used if an MSS option
   188  	// is not received from the peer. It's also the value returned by
   189  	// TCP_MAXSEG option for a socket in an unconnected state.
   190  	//
   191  	// Per RFC 1122, page 85: "If an MSS option is not received at
   192  	// connection setup, TCP MUST assume a default send MSS of 536."
   193  	TCPDefaultMSS = 536
   194  )
   195  
   196  // SourcePort returns the "source port" field of the tcp header.
   197  func (b TCP) SourcePort() uint16 {
   198  	return binary.BigEndian.Uint16(b[TCPSrcPortOffset:])
   199  }
   200  
   201  // DestinationPort returns the "destination port" field of the tcp header.
   202  func (b TCP) DestinationPort() uint16 {
   203  	return binary.BigEndian.Uint16(b[TCPDstPortOffset:])
   204  }
   205  
   206  // SequenceNumber returns the "sequence number" field of the tcp header.
   207  func (b TCP) SequenceNumber() uint32 {
   208  	return binary.BigEndian.Uint32(b[TCPSeqNumOffset:])
   209  }
   210  
   211  // AckNumber returns the "ack number" field of the tcp header.
   212  func (b TCP) AckNumber() uint32 {
   213  	return binary.BigEndian.Uint32(b[TCPAckNumOffset:])
   214  }
   215  
   216  // DataOffset returns the "data offset" field of the tcp header.
   217  func (b TCP) DataOffset() uint8 {
   218  	return (b[TCPDataOffset] >> 4) * 4
   219  }
   220  
   221  // Payload returns the data in the tcp packet.
   222  func (b TCP) Payload() []byte {
   223  	return b[b.DataOffset():]
   224  }
   225  
   226  // Flags returns the flags field of the tcp header.
   227  func (b TCP) Flags() uint8 {
   228  	return b[TCPFlagsOffset]
   229  }
   230  
   231  // WindowSize returns the "window size" field of the tcp header.
   232  func (b TCP) WindowSize() uint16 {
   233  	return binary.BigEndian.Uint16(b[TCPWinSizeOffset:])
   234  }
   235  
   236  // Checksum returns the "checksum" field of the tcp header.
   237  func (b TCP) Checksum() uint16 {
   238  	return binary.BigEndian.Uint16(b[TCPChecksumOffset:])
   239  }
   240  
   241  // SetSourcePort sets the "source port" field of the tcp header.
   242  func (b TCP) SetSourcePort(port uint16) {
   243  	binary.BigEndian.PutUint16(b[TCPSrcPortOffset:], port)
   244  }
   245  
   246  // SetDestinationPort sets the "destination port" field of the tcp header.
   247  func (b TCP) SetDestinationPort(port uint16) {
   248  	binary.BigEndian.PutUint16(b[TCPDstPortOffset:], port)
   249  }
   250  
   251  // SetChecksum sets the checksum field of the tcp header.
   252  func (b TCP) SetChecksum(checksum uint16) {
   253  	binary.BigEndian.PutUint16(b[TCPChecksumOffset:], checksum)
   254  }
   255  
   256  // CalculateChecksum calculates the checksum of the tcp segment.
   257  // partialChecksum is the checksum of the network-layer pseudo-header
   258  // and the checksum of the segment data.
   259  func (b TCP) CalculateChecksum(partialChecksum uint16) uint16 {
   260  	// Calculate the rest of the checksum.
   261  	return Checksum(b[:b.DataOffset()], partialChecksum)
   262  }
   263  
   264  // Options returns a slice that holds the unparsed TCP options in the segment.
   265  func (b TCP) Options() []byte {
   266  	return b[TCPMinimumSize:b.DataOffset()]
   267  }
   268  
   269  // ParsedOptions returns a TCPOptions structure which parses and caches the TCP
   270  // option values in the TCP segment. NOTE: Invoking this function repeatedly is
   271  // expensive as it reparses the options on each invocation.
   272  func (b TCP) ParsedOptions() TCPOptions {
   273  	return ParseTCPOptions(b.Options())
   274  }
   275  
   276  func (b TCP) encodeSubset(seq, ack uint32, flags uint8, rcvwnd uint16) {
   277  	binary.BigEndian.PutUint32(b[TCPSeqNumOffset:], seq)
   278  	binary.BigEndian.PutUint32(b[TCPAckNumOffset:], ack)
   279  	b[TCPFlagsOffset] = flags
   280  	binary.BigEndian.PutUint16(b[TCPWinSizeOffset:], rcvwnd)
   281  }
   282  
   283  // Encode encodes all the fields of the tcp header.
   284  func (b TCP) Encode(t *TCPFields) {
   285  	b.encodeSubset(t.SeqNum, t.AckNum, t.Flags, t.WindowSize)
   286  	binary.BigEndian.PutUint16(b[TCPSrcPortOffset:], t.SrcPort)
   287  	binary.BigEndian.PutUint16(b[TCPDstPortOffset:], t.DstPort)
   288  	b[TCPDataOffset] = (t.DataOffset / 4) << 4
   289  	binary.BigEndian.PutUint16(b[TCPChecksumOffset:], t.Checksum)
   290  	binary.BigEndian.PutUint16(b[TCPUrgentPtrOffset:], t.UrgentPointer)
   291  }
   292  
   293  // EncodePartial updates a subset of the fields of the tcp header. It is useful
   294  // in cases when similar segments are produced.
   295  func (b TCP) EncodePartial(partialChecksum, length uint16, seqnum, acknum uint32, flags byte, rcvwnd uint16) {
   296  	// Add the total length and "flags" field contributions to the checksum.
   297  	// We don't use the flags field directly from the header because it's a
   298  	// one-byte field with an odd offset, so it would be accounted for
   299  	// incorrectly by the Checksum routine.
   300  	tmp := make([]byte, 4)
   301  	binary.BigEndian.PutUint16(tmp, length)
   302  	binary.BigEndian.PutUint16(tmp[2:], uint16(flags))
   303  	checksum := Checksum(tmp, partialChecksum)
   304  
   305  	// Encode the passed-in fields.
   306  	b.encodeSubset(seqnum, acknum, flags, rcvwnd)
   307  
   308  	// Add the contributions of the passed-in fields to the checksum.
   309  	checksum = Checksum(b[TCPSeqNumOffset:TCPSeqNumOffset+8], checksum)
   310  	checksum = Checksum(b[TCPWinSizeOffset:TCPWinSizeOffset+2], checksum)
   311  
   312  	// Encode the checksum.
   313  	b.SetChecksum(^checksum)
   314  }
   315  
   316  // ParseSynOptions parses the options received in a SYN segment and returns the
   317  // relevant ones. opts should point to the option part of the TCP Header.
   318  func ParseSynOptions(opts []byte, isAck bool) TCPSynOptions {
   319  	limit := len(opts)
   320  
   321  	synOpts := TCPSynOptions{
   322  		// Per RFC 1122, page 85: "If an MSS option is not received at
   323  		// connection setup, TCP MUST assume a default send MSS of 536."
   324  		MSS: TCPDefaultMSS,
   325  		// If no window scale option is specified, WS in options is
   326  		// returned as -1; this is because the absence of the option
   327  		// indicates that the we cannot use window scaling on the
   328  		// receive end either.
   329  		WS: -1,
   330  	}
   331  
   332  	for i := 0; i < limit; {
   333  		switch opts[i] {
   334  		case TCPOptionEOL:
   335  			i = limit
   336  		case TCPOptionNOP:
   337  			i++
   338  		case TCPOptionMSS:
   339  			if i+4 > limit || opts[i+1] != 4 {
   340  				return synOpts
   341  			}
   342  			mss := uint16(opts[i+2])<<8 | uint16(opts[i+3])
   343  			if mss == 0 {
   344  				return synOpts
   345  			}
   346  			synOpts.MSS = mss
   347  			i += 4
   348  
   349  		case TCPOptionWS:
   350  			if i+3 > limit || opts[i+1] != 3 {
   351  				return synOpts
   352  			}
   353  			ws := int(opts[i+2])
   354  			if ws > MaxWndScale {
   355  				ws = MaxWndScale
   356  			}
   357  			synOpts.WS = ws
   358  			i += 3
   359  
   360  		case TCPOptionTS:
   361  			if i+10 > limit || opts[i+1] != 10 {
   362  				return synOpts
   363  			}
   364  			synOpts.TSVal = binary.BigEndian.Uint32(opts[i+2:])
   365  			if isAck {
   366  				// If the segment is a SYN-ACK then store the Timestamp Echo Reply
   367  				// in the segment.
   368  				synOpts.TSEcr = binary.BigEndian.Uint32(opts[i+6:])
   369  			}
   370  			synOpts.TS = true
   371  			i += 10
   372  		case TCPOptionSACKPermitted:
   373  			if i+2 > limit || opts[i+1] != 2 {
   374  				return synOpts
   375  			}
   376  			synOpts.SACKPermitted = true
   377  			i += 2
   378  
   379  		default:
   380  			// We don't recognize this option, just skip over it.
   381  			if i+2 > limit {
   382  				return synOpts
   383  			}
   384  			l := int(opts[i+1])
   385  			// If the length is incorrect or if l+i overflows the
   386  			// total options length then return false.
   387  			if l < 2 || i+l > limit {
   388  				return synOpts
   389  			}
   390  			i += l
   391  		}
   392  	}
   393  
   394  	return synOpts
   395  }
   396  
   397  // ParseTCPOptions extracts and stores all known options in the provided byte
   398  // slice in a TCPOptions structure.
   399  func ParseTCPOptions(b []byte) TCPOptions {
   400  	opts := TCPOptions{}
   401  	limit := len(b)
   402  	for i := 0; i < limit; {
   403  		switch b[i] {
   404  		case TCPOptionEOL:
   405  			i = limit
   406  		case TCPOptionNOP:
   407  			i++
   408  		case TCPOptionTS:
   409  			if i+10 > limit || (b[i+1] != 10) {
   410  				return opts
   411  			}
   412  			opts.TS = true
   413  			opts.TSVal = binary.BigEndian.Uint32(b[i+2:])
   414  			opts.TSEcr = binary.BigEndian.Uint32(b[i+6:])
   415  			i += 10
   416  		case TCPOptionSACK:
   417  			if i+2 > limit {
   418  				// Malformed SACK block, just return and stop parsing.
   419  				return opts
   420  			}
   421  			sackOptionLen := int(b[i+1])
   422  			if i+sackOptionLen > limit || (sackOptionLen-2)%8 != 0 {
   423  				// Malformed SACK block, just return and stop parsing.
   424  				return opts
   425  			}
   426  			numBlocks := (sackOptionLen - 2) / 8
   427  			opts.SACKBlocks = []SACKBlock{}
   428  			for j := 0; j < numBlocks; j++ {
   429  				start := binary.BigEndian.Uint32(b[i+2+j*8:])
   430  				end := binary.BigEndian.Uint32(b[i+2+j*8+4:])
   431  				opts.SACKBlocks = append(opts.SACKBlocks, SACKBlock{
   432  					Start: seqnum.Value(start),
   433  					End:   seqnum.Value(end),
   434  				})
   435  			}
   436  			i += sackOptionLen
   437  		default:
   438  			// We don't recognize this option, just skip over it.
   439  			if i+2 > limit {
   440  				return opts
   441  			}
   442  			l := int(b[i+1])
   443  			// If the length is incorrect or if l+i overflows the
   444  			// total options length then return false.
   445  			if l < 2 || i+l > limit {
   446  				return opts
   447  			}
   448  			i += l
   449  		}
   450  	}
   451  	return opts
   452  }
   453  
   454  // EncodeMSSOption encodes the MSS TCP option with the provided MSS values in
   455  // the supplied buffer. If the provided buffer is not large enough then it just
   456  // returns without encoding anything. It returns the number of bytes written to
   457  // the provided buffer.
   458  func EncodeMSSOption(mss uint32, b []byte) int {
   459  	// mssOptionSize is the number of bytes in a valid MSS option.
   460  	const mssOptionSize = 4
   461  
   462  	if len(b) < mssOptionSize {
   463  		return 0
   464  	}
   465  	b[0], b[1], b[2], b[3] = TCPOptionMSS, mssOptionSize, byte(mss>>8), byte(mss)
   466  	return mssOptionSize
   467  }
   468  
   469  // EncodeWSOption encodes the WS TCP option with the WS value in the
   470  // provided buffer. If the provided buffer is not large enough then it just
   471  // returns without encoding anything. It returns the number of bytes written to
   472  // the provided buffer.
   473  func EncodeWSOption(ws int, b []byte) int {
   474  	if len(b) < 3 {
   475  		return 0
   476  	}
   477  	b[0], b[1], b[2] = TCPOptionWS, 3, uint8(ws)
   478  	return int(b[1])
   479  }
   480  
   481  // EncodeTSOption encodes the provided tsVal and tsEcr values as a TCP timestamp
   482  // option into the provided buffer. If the buffer is smaller than expected it
   483  // just returns without encoding anything. It returns the number of bytes
   484  // written to the provided buffer.
   485  func EncodeTSOption(tsVal, tsEcr uint32, b []byte) int {
   486  	if len(b) < 10 {
   487  		return 0
   488  	}
   489  	b[0], b[1] = TCPOptionTS, 10
   490  	binary.BigEndian.PutUint32(b[2:], tsVal)
   491  	binary.BigEndian.PutUint32(b[6:], tsEcr)
   492  	return int(b[1])
   493  }
   494  
   495  // EncodeSACKPermittedOption encodes a SACKPermitted option into the provided
   496  // buffer. If the buffer is smaller than required it just returns without
   497  // encoding anything. It returns the number of bytes written to the provided
   498  // buffer.
   499  func EncodeSACKPermittedOption(b []byte) int {
   500  	if len(b) < 2 {
   501  		return 0
   502  	}
   503  
   504  	b[0], b[1] = TCPOptionSACKPermitted, 2
   505  	return int(b[1])
   506  }
   507  
   508  // EncodeSACKBlocks encodes the provided SACK blocks as a TCP SACK option block
   509  // in the provided slice. It tries to fit in as many blocks as possible based on
   510  // number of bytes available in the provided buffer. It returns the number of
   511  // bytes written to the provided buffer.
   512  func EncodeSACKBlocks(sackBlocks []SACKBlock, b []byte) int {
   513  	if len(sackBlocks) == 0 {
   514  		return 0
   515  	}
   516  	l := len(sackBlocks)
   517  	if l > TCPMaxSACKBlocks {
   518  		l = TCPMaxSACKBlocks
   519  	}
   520  	if ll := (len(b) - 2) / 8; ll < l {
   521  		l = ll
   522  	}
   523  	if l == 0 {
   524  		// There is not enough space in the provided buffer to add
   525  		// any SACK blocks.
   526  		return 0
   527  	}
   528  	b[0] = TCPOptionSACK
   529  	b[1] = byte(l*8 + 2)
   530  	for i := 0; i < l; i++ {
   531  		binary.BigEndian.PutUint32(b[i*8+2:], uint32(sackBlocks[i].Start))
   532  		binary.BigEndian.PutUint32(b[i*8+6:], uint32(sackBlocks[i].End))
   533  	}
   534  	return int(b[1])
   535  }
   536  
   537  // EncodeNOP adds an explicit NOP to the option list.
   538  func EncodeNOP(b []byte) int {
   539  	if len(b) == 0 {
   540  		return 0
   541  	}
   542  	b[0] = TCPOptionNOP
   543  	return 1
   544  }
   545  
   546  // AddTCPOptionPadding adds the required number of TCPOptionNOP to quad align
   547  // the option buffer. It adds padding bytes after the offset specified and
   548  // returns the number of padding bytes added. The passed in options slice
   549  // must have space for the padding bytes.
   550  func AddTCPOptionPadding(options []byte, offset int) int {
   551  	paddingToAdd := -offset & 3
   552  	// Now add any padding bytes that might be required to quad align the
   553  	// options.
   554  	for i := offset; i < offset+paddingToAdd; i++ {
   555  		options[i] = TCPOptionNOP
   556  	}
   557  	return paddingToAdd
   558  }