github.com/noisysockets/netstack@v0.6.0/pkg/tcpip/transport/tcpconntrack/tcp_conntrack.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package tcpconntrack implements a TCP connection tracking object. It allows
    16  // users with access to a segment stream to figure out when a connection is
    17  // established, reset, and closed (and in the last case, who closed first).
    18  package tcpconntrack
    19  
    20  import (
    21  	"github.com/noisysockets/netstack/pkg/tcpip/header"
    22  	"github.com/noisysockets/netstack/pkg/tcpip/seqnum"
    23  )
    24  
    25  // Result is returned when the state of a TCB is updated in response to a
    26  // segment.
    27  type Result int
    28  
    29  const (
    30  	// ResultDrop indicates that the segment should be dropped.
    31  	ResultDrop Result = iota
    32  
    33  	// ResultConnecting indicates that the connection remains in a
    34  	// connecting state.
    35  	ResultConnecting
    36  
    37  	// ResultAlive indicates that the connection remains alive (connected).
    38  	ResultAlive
    39  
    40  	// ResultReset indicates that the connection was reset.
    41  	ResultReset
    42  
    43  	// ResultClosedByResponder indicates that the connection was gracefully
    44  	// closed, and the reply stream was closed first.
    45  	ResultClosedByResponder
    46  
    47  	// ResultClosedByOriginator indicates that the connection was gracefully
    48  	// closed, and the original stream was closed first.
    49  	ResultClosedByOriginator
    50  )
    51  
    52  // maxWindowShift is the maximum shift value of the per the windows scale
    53  // option defined by RFC 1323.
    54  const maxWindowShift = 14
    55  
    56  // TCB is a TCP Control Block. It holds state necessary to keep track of a TCP
    57  // connection and inform the caller when the connection has been closed.
    58  type TCB struct {
    59  	reply    stream
    60  	original stream
    61  
    62  	// State handlers. hdr is not guaranteed to contain bytes beyond the TCP
    63  	// header itself, i.e. it may not contain the payload.
    64  	handlerReply    func(tcb *TCB, hdr header.TCP, dataLen int) Result
    65  	handlerOriginal func(tcb *TCB, hdr header.TCP, dataLen int) Result
    66  
    67  	// firstFin holds a pointer to the first stream to send a FIN.
    68  	firstFin *stream
    69  
    70  	// state is the current state of the stream.
    71  	state Result
    72  }
    73  
    74  // Init initializes the state of the TCB according to the initial SYN.
    75  func (t *TCB) Init(initialSyn header.TCP, dataLen int) Result {
    76  	t.handlerReply = synSentStateReply
    77  	t.handlerOriginal = synSentStateOriginal
    78  
    79  	iss := seqnum.Value(initialSyn.SequenceNumber())
    80  	t.original.una = iss
    81  	t.original.nxt = iss.Add(logicalLenSyn(initialSyn, dataLen))
    82  	t.original.end = t.original.nxt
    83  	// TODO(gvisor.dev/issue/6734): Cache TCP options instead of re-parsing them.
    84  	// Because original and reply are streams, scale applies to the reply; it is
    85  	// the receive window in the reply direction.
    86  	t.reply.shiftCnt = header.ParseSynOptions(initialSyn.Options(), false /* isAck */).WS
    87  
    88  	// Even though "end" is a sequence number, we don't know the initial
    89  	// receive sequence number yet, so we store the window size until we get
    90  	// a SYN from the server.
    91  	t.reply.una = 0
    92  	t.reply.nxt = 0
    93  	t.reply.end = seqnum.Value(initialSyn.WindowSize())
    94  	t.state = ResultConnecting
    95  	return t.state
    96  }
    97  
    98  // UpdateStateReply updates the state of the TCB based on the supplied reply
    99  // segment.
   100  func (t *TCB) UpdateStateReply(tcp header.TCP, dataLen int) Result {
   101  	st := t.handlerReply(t, tcp, dataLen)
   102  	if st != ResultDrop {
   103  		t.state = st
   104  	}
   105  	return st
   106  }
   107  
   108  // UpdateStateOriginal updates the state of the TCB based on the supplied
   109  // original segment.
   110  func (t *TCB) UpdateStateOriginal(tcp header.TCP, dataLen int) Result {
   111  	st := t.handlerOriginal(t, tcp, dataLen)
   112  	if st != ResultDrop {
   113  		t.state = st
   114  	}
   115  	return st
   116  }
   117  
   118  // State returns the current state of the TCB.
   119  func (t *TCB) State() Result {
   120  	return t.state
   121  }
   122  
   123  // IsAlive returns true as long as the connection is established(Alive)
   124  // or connecting state.
   125  func (t *TCB) IsAlive() bool {
   126  	return !t.reply.rstSeen && !t.original.rstSeen && (!t.reply.closed() || !t.original.closed())
   127  }
   128  
   129  // OriginalSendSequenceNumber returns the snd.NXT for the original stream.
   130  func (t *TCB) OriginalSendSequenceNumber() seqnum.Value {
   131  	return t.original.nxt
   132  }
   133  
   134  // ReplySendSequenceNumber returns the snd.NXT for the reply stream.
   135  func (t *TCB) ReplySendSequenceNumber() seqnum.Value {
   136  	return t.reply.nxt
   137  }
   138  
   139  // adapResult modifies the supplied "Result" according to the state of the TCB;
   140  // if r is anything other than "Alive", or if one of the streams isn't closed
   141  // yet, it is returned unmodified. Otherwise it's converted to either
   142  // ClosedByOriginator or ClosedByResponder depending on which stream was closed
   143  // first.
   144  func (t *TCB) adaptResult(r Result) Result {
   145  	// Check the unmodified case.
   146  	if r != ResultAlive || !t.reply.closed() || !t.original.closed() {
   147  		return r
   148  	}
   149  
   150  	// Find out which was closed first.
   151  	if t.firstFin == &t.original {
   152  		return ResultClosedByOriginator
   153  	}
   154  
   155  	return ResultClosedByResponder
   156  }
   157  
   158  // synSentStateReply is the state handler for reply segments when the
   159  // connection is in SYN-SENT state.
   160  func synSentStateReply(t *TCB, tcp header.TCP, dataLen int) Result {
   161  	flags := tcp.Flags()
   162  	ackPresent := flags&header.TCPFlagAck != 0
   163  	ack := seqnum.Value(tcp.AckNumber())
   164  
   165  	// Ignore segment if ack is present but not acceptable.
   166  	if ackPresent && !(ack-1).InRange(t.original.una, t.original.nxt) {
   167  		return ResultConnecting
   168  	}
   169  
   170  	// If reset is specified, we will let the packet through no matter what
   171  	// but we will also destroy the connection if the ACK is present (and
   172  	// implicitly acceptable).
   173  	if flags&header.TCPFlagRst != 0 {
   174  		if ackPresent {
   175  			t.reply.rstSeen = true
   176  			return ResultReset
   177  		}
   178  		return ResultConnecting
   179  	}
   180  
   181  	// Ignore segment if SYN is not set.
   182  	if flags&header.TCPFlagSyn == 0 {
   183  		return ResultConnecting
   184  	}
   185  
   186  	// TODO(gvisor.dev/issue/6734): Cache TCP options instead of re-parsing them.
   187  	// Because original and reply are streams, scale applies to the reply; it is
   188  	// the receive window in the original direction.
   189  	t.original.shiftCnt = header.ParseSynOptions(tcp.Options(), ackPresent).WS
   190  
   191  	// Window scaling works only when both ends use the scale option.
   192  	if t.original.shiftCnt != -1 && t.reply.shiftCnt != -1 {
   193  		// Per RFC 1323 section 2.3:
   194  		//
   195  		//  "If a Window Scale option is received with a shift.cnt value exceeding
   196  		//  14, the TCP should log the error but use 14 instead of the specified
   197  		//  value."
   198  		if t.original.shiftCnt > maxWindowShift {
   199  			t.original.shiftCnt = maxWindowShift
   200  		}
   201  		if t.reply.shiftCnt > maxWindowShift {
   202  			t.original.shiftCnt = maxWindowShift
   203  		}
   204  	} else {
   205  		t.original.shiftCnt = 0
   206  		t.reply.shiftCnt = 0
   207  	}
   208  	// Update state informed by this SYN.
   209  	irs := seqnum.Value(tcp.SequenceNumber())
   210  	t.reply.una = irs
   211  	t.reply.nxt = irs.Add(logicalLen(tcp, dataLen, seqnum.Size(t.reply.end) /* end currently holds the receive window size */))
   212  	t.reply.end <<= t.reply.shiftCnt
   213  	t.reply.end.UpdateForward(seqnum.Size(irs))
   214  
   215  	windowSize := t.original.windowSize(tcp)
   216  	t.original.end = t.original.una.Add(windowSize)
   217  
   218  	// If the ACK was set (it is acceptable), update our unacknowledgement
   219  	// tracking.
   220  	if ackPresent {
   221  		// Advance the "una" and "end" indices of the original stream.
   222  		if t.original.una.LessThan(ack) {
   223  			t.original.una = ack
   224  		}
   225  
   226  		if end := ack.Add(seqnum.Size(windowSize)); t.original.end.LessThan(end) {
   227  			t.original.end = end
   228  		}
   229  	}
   230  
   231  	// Update handlers so that new calls will be handled by new state.
   232  	t.handlerReply = allOtherReply
   233  	t.handlerOriginal = allOtherOriginal
   234  
   235  	return ResultAlive
   236  }
   237  
   238  // synSentStateOriginal is the state handler for original segments when the
   239  // connection is in SYN-SENT state.
   240  func synSentStateOriginal(t *TCB, tcp header.TCP, _ int) Result {
   241  	// Drop original segments that aren't retransmits of the original one.
   242  	if tcp.Flags() != header.TCPFlagSyn || tcp.SequenceNumber() != uint32(t.original.una) {
   243  		return ResultDrop
   244  	}
   245  
   246  	// Update the receive window. We only remember the largest value seen.
   247  	if wnd := seqnum.Value(tcp.WindowSize()); wnd > t.reply.end {
   248  		t.reply.end = wnd
   249  	}
   250  
   251  	return ResultConnecting
   252  }
   253  
   254  // update updates the state of reply and original streams, given the supplied
   255  // reply segment. For original segments, this same function can be called with
   256  // swapped reply/original streams.
   257  func update(tcp header.TCP, reply, original *stream, firstFin **stream, dataLen int) Result {
   258  	// Ignore segments out of the window.
   259  	s := seqnum.Value(tcp.SequenceNumber())
   260  	if !reply.acceptable(s, seqnum.Size(dataLen)) {
   261  		return ResultAlive
   262  	}
   263  
   264  	flags := tcp.Flags()
   265  	if flags&header.TCPFlagRst != 0 {
   266  		reply.rstSeen = true
   267  		return ResultReset
   268  	}
   269  
   270  	// Ignore segments that don't have the ACK flag, and those with the SYN
   271  	// flag.
   272  	if flags&header.TCPFlagAck == 0 || flags&header.TCPFlagSyn != 0 {
   273  		return ResultAlive
   274  	}
   275  
   276  	// Ignore segments that acknowledge not yet sent data.
   277  	ack := seqnum.Value(tcp.AckNumber())
   278  	if original.nxt.LessThan(ack) {
   279  		return ResultAlive
   280  	}
   281  
   282  	// Advance the "una" and "end" indices of the original stream.
   283  	if original.una.LessThan(ack) {
   284  		original.una = ack
   285  	}
   286  
   287  	if end := ack.Add(original.windowSize(tcp)); original.end.LessThan(end) {
   288  		original.end = end
   289  	}
   290  
   291  	// Advance the "nxt" index of the reply stream.
   292  	end := s.Add(logicalLen(tcp, dataLen, reply.rwndSize()))
   293  	if reply.nxt.LessThan(end) {
   294  		reply.nxt = end
   295  	}
   296  
   297  	// Note the index of the FIN segment. And stash away a pointer to the
   298  	// first stream to see a FIN.
   299  	if flags&header.TCPFlagFin != 0 && !reply.finSeen {
   300  		reply.finSeen = true
   301  		reply.fin = end - 1
   302  
   303  		if *firstFin == nil {
   304  			*firstFin = reply
   305  		}
   306  	}
   307  
   308  	return ResultAlive
   309  }
   310  
   311  // allOtherReply is the state handler for reply segments in all states
   312  // except SYN-SENT.
   313  func allOtherReply(t *TCB, tcp header.TCP, dataLen int) Result {
   314  	return t.adaptResult(update(tcp, &t.reply, &t.original, &t.firstFin, dataLen))
   315  }
   316  
   317  // allOtherOriginal is the state handler for original segments in all states
   318  // except SYN-SENT.
   319  func allOtherOriginal(t *TCB, tcp header.TCP, dataLen int) Result {
   320  	return t.adaptResult(update(tcp, &t.original, &t.reply, &t.firstFin, dataLen))
   321  }
   322  
   323  // streams holds the state of a TCP unidirectional stream.
   324  type stream struct {
   325  	// The interval [una, end) is the allowed interval as defined by the
   326  	// receiver, i.e., anything less than una has already been acknowledged
   327  	// and anything greater than or equal to end is beyond the receiver
   328  	// window. The interval [una, nxt) is the acknowledgable range, whose
   329  	// right edge indicates the sequence number of the next byte to be sent
   330  	// by the sender, i.e., anything greater than or equal to nxt hasn't
   331  	// been sent yet.
   332  	una seqnum.Value
   333  	nxt seqnum.Value
   334  	end seqnum.Value
   335  
   336  	// finSeen indicates if a FIN has already been sent on this stream.
   337  	finSeen bool
   338  
   339  	// fin is the sequence number of the FIN. It is only valid after finSeen
   340  	// is set to true.
   341  	fin seqnum.Value
   342  
   343  	// rstSeen indicates if a RST has already been sent on this stream.
   344  	rstSeen bool
   345  
   346  	// shiftCnt is the shift of the window scale of the receiver of the stream,
   347  	// i.e. in a stream from A to B it is B's receive window scale. It cannot be
   348  	// greater than maxWindowScale.
   349  	shiftCnt int
   350  }
   351  
   352  // acceptable determines if the segment with the given sequence number and data
   353  // length is acceptable, i.e., if it's within the [una, end) window or, in case
   354  // the window is zero, if it's a packet with no payload and sequence number
   355  // equal to una.
   356  func (s *stream) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool {
   357  	return header.Acceptable(segSeq, segLen, s.una, s.end)
   358  }
   359  
   360  // closed determines if the stream has already been closed. This happens when
   361  // a FIN has been set by the sender and acknowledged by the receiver.
   362  func (s *stream) closed() bool {
   363  	return s.finSeen && s.fin.LessThan(s.una)
   364  }
   365  
   366  // rwndSize returns the stream's receive window size.
   367  func (s *stream) rwndSize() seqnum.Size {
   368  	return s.una.Size(s.end)
   369  }
   370  
   371  // windowSize returns the stream's window size accounting for scale.
   372  func (s *stream) windowSize(tcp header.TCP) seqnum.Size {
   373  	return seqnum.Size(tcp.WindowSize()) << s.shiftCnt
   374  }
   375  
   376  // logicalLenSyn calculates the logical length of a SYN (without ACK) segment.
   377  // It is similar to logicalLen, but does not impose a window size requirement
   378  // because of the SYN.
   379  func logicalLenSyn(tcp header.TCP, dataLen int) seqnum.Size {
   380  	length := seqnum.Size(dataLen)
   381  	flags := tcp.Flags()
   382  	if flags&header.TCPFlagSyn != 0 {
   383  		length++
   384  	}
   385  	if flags&header.TCPFlagFin != 0 {
   386  		length++
   387  	}
   388  	return length
   389  }
   390  
   391  // logicalLen calculates the logical length of the TCP segment.
   392  func logicalLen(tcp header.TCP, dataLen int, windowSize seqnum.Size) seqnum.Size {
   393  	// If the segment is too large, TCP trims the payload per RFC 793 page 70.
   394  	length := logicalLenSyn(tcp, dataLen)
   395  	if length > windowSize {
   396  		length = windowSize
   397  	}
   398  	return length
   399  }
   400  
   401  // IsEmpty returns true if tcb is not initialized.
   402  func (t *TCB) IsEmpty() bool {
   403  	if t.reply != (stream{}) || t.original != (stream{}) {
   404  		return false
   405  	}
   406  
   407  	if t.firstFin != nil || t.state != ResultDrop {
   408  		return false
   409  	}
   410  
   411  	return true
   412  }