github.com/flowerwrong/netstack@v0.0.0-20191009141956-e5848263af28/tcpip/transport/tcp/rcv.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tcp
    16  
    17  import (
    18  	"container/heap"
    19  	"time"
    20  
    21  	"github.com/FlowerWrong/netstack/tcpip/header"
    22  	"github.com/FlowerWrong/netstack/tcpip/seqnum"
    23  )
    24  
    25  // receiver holds the state necessary to receive TCP segments and turn them
    26  // into a stream of bytes.
    27  //
    28  // +stateify savable
    29  type receiver struct {
    30  	ep *endpoint
    31  
    32  	rcvNxt seqnum.Value
    33  
    34  	// rcvAcc is one beyond the last acceptable sequence number. That is,
    35  	// the "largest" sequence value that the receiver has announced to the
    36  	// its peer that it's willing to accept. This may be different than
    37  	// rcvNxt + rcvWnd if the receive window is reduced; in that case we
    38  	// have to reduce the window as we receive more data instead of
    39  	// shrinking it.
    40  	rcvAcc seqnum.Value
    41  
    42  	// rcvWnd is the non-scaled receive window last advertised to the peer.
    43  	rcvWnd seqnum.Size
    44  
    45  	rcvWndScale uint8
    46  
    47  	closed bool
    48  
    49  	pendingRcvdSegments segmentHeap
    50  	pendingBufUsed      seqnum.Size
    51  	pendingBufSize      seqnum.Size
    52  }
    53  
    54  func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8, pendingBufSize seqnum.Size) *receiver {
    55  	return &receiver{
    56  		ep:             ep,
    57  		rcvNxt:         irs + 1,
    58  		rcvAcc:         irs.Add(rcvWnd + 1),
    59  		rcvWnd:         rcvWnd,
    60  		rcvWndScale:    rcvWndScale,
    61  		pendingBufSize: pendingBufSize,
    62  	}
    63  }
    64  
    65  // acceptable checks if the segment sequence number range is acceptable
    66  // according to the table on page 26 of RFC 793.
    67  func (r *receiver) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool {
    68  	rcvWnd := r.rcvNxt.Size(r.rcvAcc)
    69  	if rcvWnd == 0 {
    70  		return segLen == 0 && segSeq == r.rcvNxt
    71  	}
    72  
    73  	return segSeq.InWindow(r.rcvNxt, rcvWnd) ||
    74  		seqnum.Overlap(r.rcvNxt, rcvWnd, segSeq, segLen)
    75  }
    76  
    77  // getSendParams returns the parameters needed by the sender when building
    78  // segments to send.
    79  func (r *receiver) getSendParams() (rcvNxt seqnum.Value, rcvWnd seqnum.Size) {
    80  	// Calculate the window size based on the available buffer space.
    81  	receiveBufferAvailable := r.ep.receiveBufferAvailable()
    82  	acc := r.rcvNxt.Add(seqnum.Size(receiveBufferAvailable))
    83  	if r.rcvAcc.LessThan(acc) {
    84  		r.rcvAcc = acc
    85  	}
    86  	// Stash away the non-scaled receive window as we use it for measuring
    87  	// receiver's estimated RTT.
    88  	r.rcvWnd = r.rcvNxt.Size(r.rcvAcc)
    89  	return r.rcvNxt, r.rcvWnd >> r.rcvWndScale
    90  }
    91  
    92  // nonZeroWindow is called when the receive window grows from zero to nonzero;
    93  // in such cases we may need to send an ack to indicate to our peer that it can
    94  // resume sending data.
    95  func (r *receiver) nonZeroWindow() {
    96  	if (r.rcvAcc-r.rcvNxt)>>r.rcvWndScale != 0 {
    97  		// We never got around to announcing a zero window size, so we
    98  		// don't need to immediately announce a nonzero one.
    99  		return
   100  	}
   101  
   102  	// Immediately send an ack.
   103  	r.ep.snd.sendAck()
   104  }
   105  
   106  // consumeSegment attempts to consume a segment that was received by r. The
   107  // segment may have just been received or may have been received earlier but
   108  // wasn't ready to be consumed then.
   109  //
   110  // Returns true if the segment was consumed, false if it cannot be consumed
   111  // yet because of a missing segment.
   112  func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum.Size) bool {
   113  	if segLen > 0 {
   114  		// If the segment doesn't include the seqnum we're expecting to
   115  		// consume now, we're missing a segment. We cannot proceed until
   116  		// we receive that segment though.
   117  		if !r.rcvNxt.InWindow(segSeq, segLen) {
   118  			return false
   119  		}
   120  
   121  		// Trim segment to eliminate already acknowledged data.
   122  		if segSeq.LessThan(r.rcvNxt) {
   123  			diff := segSeq.Size(r.rcvNxt)
   124  			segLen -= diff
   125  			segSeq.UpdateForward(diff)
   126  			s.sequenceNumber.UpdateForward(diff)
   127  			s.data.TrimFront(int(diff))
   128  		}
   129  
   130  		// Move segment to ready-to-deliver list. Wakeup any waiters.
   131  		r.ep.readyToRead(s)
   132  
   133  	} else if segSeq != r.rcvNxt {
   134  		return false
   135  	}
   136  
   137  	// Update the segment that we're expecting to consume.
   138  	r.rcvNxt = segSeq.Add(segLen)
   139  
   140  	// In cases of a misbehaving sender which could send more than the
   141  	// advertised window, we could end up in a situation where we get a
   142  	// segment that exceeds the window advertised. Instead of partially
   143  	// accepting the segment and discarding bytes beyond the advertised
   144  	// window, we accept the whole segment and make sure r.rcvAcc is moved
   145  	// forward to match r.rcvNxt to indicate that the window is now closed.
   146  	//
   147  	// In absence of this check the r.acceptable() check fails and accepts
   148  	// segments that should be dropped because rcvWnd is calculated as
   149  	// the size of the interval (rcvNxt, rcvAcc] which becomes extremely
   150  	// large if rcvAcc is ever less than rcvNxt.
   151  	if r.rcvAcc.LessThan(r.rcvNxt) {
   152  		r.rcvAcc = r.rcvNxt
   153  	}
   154  
   155  	// Trim SACK Blocks to remove any SACK information that covers
   156  	// sequence numbers that have been consumed.
   157  	TrimSACKBlockList(&r.ep.sack, r.rcvNxt)
   158  
   159  	// Handle FIN or FIN-ACK.
   160  	if s.flagIsSet(header.TCPFlagFin) {
   161  		r.rcvNxt++
   162  
   163  		// Send ACK immediately.
   164  		r.ep.snd.sendAck()
   165  
   166  		// Tell any readers that no more data will come.
   167  		r.closed = true
   168  		r.ep.readyToRead(nil)
   169  
   170  		// We just received a FIN, our next state depends on whether we sent a
   171  		// FIN already or not.
   172  		r.ep.mu.Lock()
   173  		switch r.ep.state {
   174  		case StateEstablished:
   175  			r.ep.state = StateCloseWait
   176  		case StateFinWait1:
   177  			if s.flagIsSet(header.TCPFlagAck) {
   178  				// FIN-ACK, transition to TIME-WAIT.
   179  				r.ep.state = StateTimeWait
   180  			} else {
   181  				// Simultaneous close, expecting a final ACK.
   182  				r.ep.state = StateClosing
   183  			}
   184  		case StateFinWait2:
   185  			r.ep.state = StateTimeWait
   186  		}
   187  		r.ep.mu.Unlock()
   188  
   189  		// Flush out any pending segments, except the very first one if
   190  		// it happens to be the one we're handling now because the
   191  		// caller is using it.
   192  		first := 0
   193  		if len(r.pendingRcvdSegments) != 0 && r.pendingRcvdSegments[0] == s {
   194  			first = 1
   195  		}
   196  
   197  		for i := first; i < len(r.pendingRcvdSegments); i++ {
   198  			r.pendingRcvdSegments[i].decRef()
   199  		}
   200  		r.pendingRcvdSegments = r.pendingRcvdSegments[:first]
   201  
   202  		return true
   203  	}
   204  
   205  	// Handle ACK (not FIN-ACK, which we handled above) during one of the
   206  	// shutdown states.
   207  	if s.flagIsSet(header.TCPFlagAck) {
   208  		r.ep.mu.Lock()
   209  		switch r.ep.state {
   210  		case StateFinWait1:
   211  			r.ep.state = StateFinWait2
   212  		case StateClosing:
   213  			r.ep.state = StateTimeWait
   214  		case StateLastAck:
   215  			r.ep.state = StateClose
   216  		}
   217  		r.ep.mu.Unlock()
   218  	}
   219  
   220  	return true
   221  }
   222  
   223  // updateRTT updates the receiver RTT measurement based on the sequence number
   224  // of the received segment.
   225  func (r *receiver) updateRTT() {
   226  	// From: https://public.lanl.gov/radiant/pubs/drs/sc2001-poster.pdf
   227  	//
   228  	// A system that is only transmitting acknowledgements can still
   229  	// estimate the round-trip time by observing the time between when a byte
   230  	// is first acknowledged and the receipt of data that is at least one
   231  	// window beyond the sequence number that was acknowledged.
   232  	r.ep.rcvListMu.Lock()
   233  	if r.ep.rcvAutoParams.rttMeasureTime.IsZero() {
   234  		// New measurement.
   235  		r.ep.rcvAutoParams.rttMeasureTime = time.Now()
   236  		r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd)
   237  		r.ep.rcvListMu.Unlock()
   238  		return
   239  	}
   240  	if r.rcvNxt.LessThan(r.ep.rcvAutoParams.rttMeasureSeqNumber) {
   241  		r.ep.rcvListMu.Unlock()
   242  		return
   243  	}
   244  	rtt := time.Since(r.ep.rcvAutoParams.rttMeasureTime)
   245  	// We only store the minimum observed RTT here as this is only used in
   246  	// absence of a SRTT available from either timestamps or a sender
   247  	// measurement of RTT.
   248  	if r.ep.rcvAutoParams.rtt == 0 || rtt < r.ep.rcvAutoParams.rtt {
   249  		r.ep.rcvAutoParams.rtt = rtt
   250  	}
   251  	r.ep.rcvAutoParams.rttMeasureTime = time.Now()
   252  	r.ep.rcvAutoParams.rttMeasureSeqNumber = r.rcvNxt.Add(r.rcvWnd)
   253  	r.ep.rcvListMu.Unlock()
   254  }
   255  
   256  // handleRcvdSegment handles TCP segments directed at the connection managed by
   257  // r as they arrive. It is called by the protocol main loop.
   258  func (r *receiver) handleRcvdSegment(s *segment) {
   259  	// We don't care about receive processing anymore if the receive side
   260  	// is closed.
   261  	if r.closed {
   262  		return
   263  	}
   264  
   265  	segLen := seqnum.Size(s.data.Size())
   266  	segSeq := s.sequenceNumber
   267  
   268  	// If the sequence number range is outside the acceptable range, just
   269  	// send an ACK. This is according to RFC 793, page 37.
   270  	if !r.acceptable(segSeq, segLen) {
   271  		r.ep.snd.sendAck()
   272  		return
   273  	}
   274  
   275  	// Defer segment processing if it can't be consumed now.
   276  	if !r.consumeSegment(s, segSeq, segLen) {
   277  		if segLen > 0 || s.flagIsSet(header.TCPFlagFin) {
   278  			// We only store the segment if it's within our buffer
   279  			// size limit.
   280  			if r.pendingBufUsed < r.pendingBufSize {
   281  				r.pendingBufUsed += s.logicalLen()
   282  				s.incRef()
   283  				heap.Push(&r.pendingRcvdSegments, s)
   284  				UpdateSACKBlocks(&r.ep.sack, segSeq, segSeq.Add(segLen), r.rcvNxt)
   285  			}
   286  
   287  			// Immediately send an ack so that the peer knows it may
   288  			// have to retransmit.
   289  			r.ep.snd.sendAck()
   290  		}
   291  		return
   292  	}
   293  
   294  	// Since we consumed a segment update the receiver's RTT estimate
   295  	// if required.
   296  	if segLen > 0 {
   297  		r.updateRTT()
   298  	}
   299  
   300  	// By consuming the current segment, we may have filled a gap in the
   301  	// sequence number domain that allows pending segments to be consumed
   302  	// now. So try to do it.
   303  	for !r.closed && r.pendingRcvdSegments.Len() > 0 {
   304  		s := r.pendingRcvdSegments[0]
   305  		segLen := seqnum.Size(s.data.Size())
   306  		segSeq := s.sequenceNumber
   307  
   308  		// Skip segment altogether if it has already been acknowledged.
   309  		if !segSeq.Add(segLen-1).LessThan(r.rcvNxt) &&
   310  			!r.consumeSegment(s, segSeq, segLen) {
   311  			break
   312  		}
   313  
   314  		heap.Pop(&r.pendingRcvdSegments)
   315  		r.pendingBufUsed -= s.logicalLen()
   316  		s.decRef()
   317  	}
   318  }