inet.af/netstack@v0.0.0-20220214151720-7585b01ddccf/tcpip/transport/tcp/rcv.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tcp
    16  
    17  import (
    18  	"container/heap"
    19  	"math"
    20  
    21  	"inet.af/netstack/tcpip"
    22  	"inet.af/netstack/tcpip/header"
    23  	"inet.af/netstack/tcpip/seqnum"
    24  	"inet.af/netstack/tcpip/stack"
    25  )
    26  
    27  // receiver holds the state necessary to receive TCP segments and turn them
    28  // into a stream of bytes.
    29  //
    30  // +stateify savable
    31  type receiver struct {
    32  	stack.TCPReceiverState
    33  	ep *endpoint
    34  
    35  	// rcvWnd is the non-scaled receive window last advertised to the peer.
    36  	rcvWnd seqnum.Size
    37  
    38  	// rcvWUP is the RcvNxt value at the last window update sent.
    39  	rcvWUP seqnum.Value
    40  
    41  	// prevBufused is the snapshot of endpoint rcvBufUsed taken when we
    42  	// advertise a receive window.
    43  	prevBufUsed int
    44  
    45  	closed bool
    46  
    47  	// pendingRcvdSegments is bounded by the receive buffer size of the
    48  	// endpoint.
    49  	pendingRcvdSegments segmentHeap
    50  
    51  	// Time when the last ack was received.
    52  	lastRcvdAckTime tcpip.MonotonicTime
    53  }
    54  
    55  func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale uint8) *receiver {
    56  	return &receiver{
    57  		ep: ep,
    58  		TCPReceiverState: stack.TCPReceiverState{
    59  			RcvNxt:      irs + 1,
    60  			RcvAcc:      irs.Add(rcvWnd + 1),
    61  			RcvWndScale: rcvWndScale,
    62  		},
    63  		rcvWnd:          rcvWnd,
    64  		rcvWUP:          irs + 1,
    65  		lastRcvdAckTime: ep.stack.Clock().NowMonotonic(),
    66  	}
    67  }
    68  
    69  // acceptable checks if the segment sequence number range is acceptable
    70  // according to the table on page 26 of RFC 793.
    71  func (r *receiver) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool {
    72  	// r.rcvWnd could be much larger than the window size we advertised in our
    73  	// outgoing packets, we should use what we have advertised for acceptability
    74  	// test.
    75  	scaledWindowSize := r.rcvWnd >> r.RcvWndScale
    76  	if scaledWindowSize > math.MaxUint16 {
    77  		// This is what we actually put in the Window field.
    78  		scaledWindowSize = math.MaxUint16
    79  	}
    80  	advertisedWindowSize := scaledWindowSize << r.RcvWndScale
    81  	return header.Acceptable(segSeq, segLen, r.RcvNxt, r.RcvNxt.Add(advertisedWindowSize))
    82  }
    83  
    84  // currentWindow returns the available space in the window that was advertised
    85  // last to our peer.
    86  func (r *receiver) currentWindow() (curWnd seqnum.Size) {
    87  	endOfWnd := r.rcvWUP.Add(r.rcvWnd)
    88  	if endOfWnd.LessThan(r.RcvNxt) {
    89  		// return 0 if r.RcvNxt is past the end of the previously advertised window.
    90  		// This can happen because we accept a large segment completely even if
    91  		// accepting it causes it to partially exceed the advertised window.
    92  		return 0
    93  	}
    94  	return r.RcvNxt.Size(endOfWnd)
    95  }
    96  
    97  // getSendParams returns the parameters needed by the sender when building
    98  // segments to send.
    99  func (r *receiver) getSendParams() (RcvNxt seqnum.Value, rcvWnd seqnum.Size) {
   100  	newWnd := r.ep.selectWindow()
   101  	curWnd := r.currentWindow()
   102  	unackLen := int(r.ep.snd.MaxSentAck.Size(r.RcvNxt))
   103  	bufUsed := r.ep.receiveBufferUsed()
   104  
   105  	// Grow the right edge of the window only for payloads larger than the
   106  	// the segment overhead OR if the application is actively consuming data.
   107  	//
   108  	// Avoiding growing the right edge otherwise, addresses a situation below:
   109  	// An application has been slow in reading data and we have burst of
   110  	// incoming segments lengths < segment overhead. Here, our available free
   111  	// memory would reduce drastically when compared to the advertised receive
   112  	// window.
   113  	//
   114  	// For example: With incoming 512 bytes segments, segment overhead of
   115  	// 552 bytes (at the time of writing this comment), with receive window
   116  	// starting from 1MB and with rcvAdvWndScale being 1, buffer would reach 0
   117  	// when the curWnd is still 19436 bytes, because for every incoming segment
   118  	// newWnd would reduce by (552+512) >> rcvAdvWndScale (current value 1),
   119  	// while curWnd would reduce by 512 bytes.
   120  	// Such a situation causes us to keep tail dropping the incoming segments
   121  	// and never advertise zero receive window to the peer.
   122  	//
   123  	// Linux does a similar check for minimal sk_buff size (128):
   124  	// https://github.com/torvalds/linux/blob/d5beb3140f91b1c8a3d41b14d729aefa4dcc58bc/net/ipv4/tcp_input.c#L783
   125  	//
   126  	// Also, if the application is reading the data, we keep growing the right
   127  	// edge, as we are still advertising a window that we think can be serviced.
   128  	toGrow := unackLen >= SegSize || bufUsed <= r.prevBufUsed
   129  
   130  	// Update RcvAcc only if new window is > previously advertised window. We
   131  	// should never shrink the acceptable sequence space once it has been
   132  	// advertised the peer. If we shrink the acceptable sequence space then we
   133  	// would end up dropping bytes that might already be in flight.
   134  	// ====================================================  sequence space.
   135  	// ^             ^               ^                   ^
   136  	// rcvWUP       RcvNxt         RcvAcc          new RcvAcc
   137  	//               <=====curWnd ===>
   138  	//               <========= newWnd > curWnd ========= >
   139  	if r.RcvNxt.Add(curWnd).LessThan(r.RcvNxt.Add(newWnd)) && toGrow {
   140  		// If the new window moves the right edge, then update RcvAcc.
   141  		r.RcvAcc = r.RcvNxt.Add(newWnd)
   142  	} else {
   143  		if newWnd == 0 {
   144  			// newWnd is zero but we can't advertise a zero as it would cause window
   145  			// to shrink so just increment a metric to record this event.
   146  			r.ep.stats.ReceiveErrors.WantZeroRcvWindow.Increment()
   147  		}
   148  		newWnd = curWnd
   149  	}
   150  
   151  	// Apply silly-window avoidance when recovering from zero-window situation.
   152  	// Keep advertising zero receive window up until the new window reaches a
   153  	// threshold.
   154  	if r.rcvWnd == 0 && newWnd != 0 {
   155  		r.ep.rcvQueueInfo.rcvQueueMu.Lock()
   156  		if crossed, above := r.ep.windowCrossedACKThresholdLocked(int(newWnd), int(r.ep.ops.GetReceiveBufferSize())); !crossed && !above {
   157  			newWnd = 0
   158  		}
   159  		r.ep.rcvQueueInfo.rcvQueueMu.Unlock()
   160  	}
   161  
   162  	// Stash away the non-scaled receive window as we use it for measuring
   163  	// receiver's estimated RTT.
   164  	r.rcvWnd = newWnd
   165  	r.rcvWUP = r.RcvNxt
   166  	r.prevBufUsed = bufUsed
   167  	scaledWnd := r.rcvWnd >> r.RcvWndScale
   168  	if scaledWnd == 0 {
   169  		// Increment a metric if we are advertising an actual zero window.
   170  		r.ep.stats.ReceiveErrors.ZeroRcvWindowState.Increment()
   171  	}
   172  
   173  	// If we started off with a window larger than what can he held in
   174  	// the 16bit window field, we ceil the value to the max value.
   175  	if scaledWnd > math.MaxUint16 {
   176  		scaledWnd = seqnum.Size(math.MaxUint16)
   177  
   178  		// Ensure that the stashed receive window always reflects what
   179  		// is being advertised.
   180  		r.rcvWnd = scaledWnd << r.RcvWndScale
   181  	}
   182  	return r.RcvNxt, scaledWnd
   183  }
   184  
   185  // nonZeroWindow is called when the receive window grows from zero to nonzero;
   186  // in such cases we may need to send an ack to indicate to our peer that it can
   187  // resume sending data.
   188  func (r *receiver) nonZeroWindow() {
   189  	// Immediately send an ack.
   190  	r.ep.snd.sendAck()
   191  }
   192  
   193  // consumeSegment attempts to consume a segment that was received by r. The
   194  // segment may have just been received or may have been received earlier but
   195  // wasn't ready to be consumed then.
   196  //
   197  // Returns true if the segment was consumed, false if it cannot be consumed
   198  // yet because of a missing segment.
   199  func (r *receiver) consumeSegment(s *segment, segSeq seqnum.Value, segLen seqnum.Size) bool {
   200  	if segLen > 0 {
   201  		// If the segment doesn't include the seqnum we're expecting to
   202  		// consume now, we're missing a segment. We cannot proceed until
   203  		// we receive that segment though.
   204  		if !r.RcvNxt.InWindow(segSeq, segLen) {
   205  			return false
   206  		}
   207  
   208  		// Trim segment to eliminate already acknowledged data.
   209  		if segSeq.LessThan(r.RcvNxt) {
   210  			diff := segSeq.Size(r.RcvNxt)
   211  			segLen -= diff
   212  			segSeq.UpdateForward(diff)
   213  			s.sequenceNumber.UpdateForward(diff)
   214  			s.data.TrimFront(int(diff))
   215  		}
   216  
   217  		// Move segment to ready-to-deliver list. Wakeup any waiters.
   218  		r.ep.readyToRead(s)
   219  
   220  	} else if segSeq != r.RcvNxt {
   221  		return false
   222  	}
   223  
   224  	// Update the segment that we're expecting to consume.
   225  	r.RcvNxt = segSeq.Add(segLen)
   226  
   227  	// In cases of a misbehaving sender which could send more than the
   228  	// advertised window, we could end up in a situation where we get a
   229  	// segment that exceeds the window advertised. Instead of partially
   230  	// accepting the segment and discarding bytes beyond the advertised
   231  	// window, we accept the whole segment and make sure r.RcvAcc is moved
   232  	// forward to match r.RcvNxt to indicate that the window is now closed.
   233  	//
   234  	// In absence of this check the r.acceptable() check fails and accepts
   235  	// segments that should be dropped because rcvWnd is calculated as
   236  	// the size of the interval (RcvNxt, RcvAcc] which becomes extremely
   237  	// large if RcvAcc is ever less than RcvNxt.
   238  	if r.RcvAcc.LessThan(r.RcvNxt) {
   239  		r.RcvAcc = r.RcvNxt
   240  	}
   241  
   242  	// Trim SACK Blocks to remove any SACK information that covers
   243  	// sequence numbers that have been consumed.
   244  	TrimSACKBlockList(&r.ep.sack, r.RcvNxt)
   245  
   246  	// Handle FIN or FIN-ACK.
   247  	if s.flags.Contains(header.TCPFlagFin) {
   248  		r.RcvNxt++
   249  
   250  		// Send ACK immediately.
   251  		r.ep.snd.sendAck()
   252  
   253  		// Tell any readers that no more data will come.
   254  		r.closed = true
   255  		r.ep.readyToRead(nil)
   256  
   257  		// We just received a FIN, our next state depends on whether we sent a
   258  		// FIN already or not.
   259  		switch r.ep.EndpointState() {
   260  		case StateEstablished:
   261  			r.ep.setEndpointState(StateCloseWait)
   262  		case StateFinWait1:
   263  			if s.flags.Contains(header.TCPFlagAck) && s.ackNumber == r.ep.snd.SndNxt {
   264  				// FIN-ACK, transition to TIME-WAIT.
   265  				r.ep.setEndpointState(StateTimeWait)
   266  			} else {
   267  				// Simultaneous close, expecting a final ACK.
   268  				r.ep.setEndpointState(StateClosing)
   269  			}
   270  		case StateFinWait2:
   271  			r.ep.setEndpointState(StateTimeWait)
   272  		}
   273  
   274  		// Flush out any pending segments, except the very first one if
   275  		// it happens to be the one we're handling now because the
   276  		// caller is using it.
   277  		first := 0
   278  		if len(r.pendingRcvdSegments) != 0 && r.pendingRcvdSegments[0] == s {
   279  			first = 1
   280  		}
   281  
   282  		for i := first; i < len(r.pendingRcvdSegments); i++ {
   283  			r.PendingBufUsed -= r.pendingRcvdSegments[i].segMemSize()
   284  			r.pendingRcvdSegments[i].decRef()
   285  
   286  			// Note that slice truncation does not allow garbage collection of
   287  			// truncated items, thus truncated items must be set to nil to avoid
   288  			// memory leaks.
   289  			r.pendingRcvdSegments[i] = nil
   290  		}
   291  		r.pendingRcvdSegments = r.pendingRcvdSegments[:first]
   292  
   293  		return true
   294  	}
   295  
   296  	// Handle ACK (not FIN-ACK, which we handled above) during one of the
   297  	// shutdown states.
   298  	if s.flags.Contains(header.TCPFlagAck) && s.ackNumber == r.ep.snd.SndNxt {
   299  		switch r.ep.EndpointState() {
   300  		case StateFinWait1:
   301  			r.ep.setEndpointState(StateFinWait2)
   302  			// Notify protocol goroutine that we have received an
   303  			// ACK to our FIN so that it can start the FIN_WAIT2
   304  			// timer to abort connection if the other side does
   305  			// not close within 2MSL.
   306  			r.ep.notifyProtocolGoroutine(notifyClose)
   307  		case StateClosing:
   308  			r.ep.setEndpointState(StateTimeWait)
   309  		case StateLastAck:
   310  			r.ep.transitionToStateCloseLocked()
   311  		}
   312  	}
   313  
   314  	return true
   315  }
   316  
   317  // updateRTT updates the receiver RTT measurement based on the sequence number
   318  // of the received segment.
   319  func (r *receiver) updateRTT() {
   320  	// From: https://public.lanl.gov/radiant/pubs/drs/sc2001-poster.pdf
   321  	//
   322  	// A system that is only transmitting acknowledgements can still
   323  	// estimate the round-trip time by observing the time between when a byte
   324  	// is first acknowledged and the receipt of data that is at least one
   325  	// window beyond the sequence number that was acknowledged.
   326  	r.ep.rcvQueueInfo.rcvQueueMu.Lock()
   327  	if r.ep.rcvQueueInfo.RcvAutoParams.RTTMeasureTime == (tcpip.MonotonicTime{}) {
   328  		// New measurement.
   329  		r.ep.rcvQueueInfo.RcvAutoParams.RTTMeasureTime = r.ep.stack.Clock().NowMonotonic()
   330  		r.ep.rcvQueueInfo.RcvAutoParams.RTTMeasureSeqNumber = r.RcvNxt.Add(r.rcvWnd)
   331  		r.ep.rcvQueueInfo.rcvQueueMu.Unlock()
   332  		return
   333  	}
   334  	if r.RcvNxt.LessThan(r.ep.rcvQueueInfo.RcvAutoParams.RTTMeasureSeqNumber) {
   335  		r.ep.rcvQueueInfo.rcvQueueMu.Unlock()
   336  		return
   337  	}
   338  	rtt := r.ep.stack.Clock().NowMonotonic().Sub(r.ep.rcvQueueInfo.RcvAutoParams.RTTMeasureTime)
   339  	// We only store the minimum observed RTT here as this is only used in
   340  	// absence of a SRTT available from either timestamps or a sender
   341  	// measurement of RTT.
   342  	if r.ep.rcvQueueInfo.RcvAutoParams.RTT == 0 || rtt < r.ep.rcvQueueInfo.RcvAutoParams.RTT {
   343  		r.ep.rcvQueueInfo.RcvAutoParams.RTT = rtt
   344  	}
   345  	r.ep.rcvQueueInfo.RcvAutoParams.RTTMeasureTime = r.ep.stack.Clock().NowMonotonic()
   346  	r.ep.rcvQueueInfo.RcvAutoParams.RTTMeasureSeqNumber = r.RcvNxt.Add(r.rcvWnd)
   347  	r.ep.rcvQueueInfo.rcvQueueMu.Unlock()
   348  }
   349  
   350  func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, closed bool) (drop bool, err tcpip.Error) {
   351  	r.ep.rcvQueueInfo.rcvQueueMu.Lock()
   352  	rcvClosed := r.ep.rcvQueueInfo.RcvClosed || r.closed
   353  	r.ep.rcvQueueInfo.rcvQueueMu.Unlock()
   354  
   355  	// If we are in one of the shutdown states then we need to do
   356  	// additional checks before we try and process the segment.
   357  	switch state {
   358  	case StateCloseWait, StateClosing, StateLastAck:
   359  		if !s.sequenceNumber.LessThanEq(r.RcvNxt) {
   360  			// Just drop the segment as we have
   361  			// already received a FIN and this
   362  			// segment is after the sequence number
   363  			// for the FIN.
   364  			return true, nil
   365  		}
   366  		fallthrough
   367  	case StateFinWait1, StateFinWait2:
   368  		// If the ACK acks something not yet sent then we send an ACK.
   369  		//
   370  		// RFC793, page 37: If the connection is in a synchronized state,
   371  		// (ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK,
   372  		// TIME-WAIT), any unacceptable segment (out of window sequence number
   373  		// or unacceptable acknowledgment number) must elicit only an empty
   374  		// acknowledgment segment containing the current send-sequence number
   375  		// and an acknowledgment indicating the next sequence number expected
   376  		// to be received, and the connection remains in the same state.
   377  		//
   378  		// Just as on Linux, we do not apply this behavior when state is
   379  		// ESTABLISHED.
   380  		// Linux receive processing for all states except ESTABLISHED and
   381  		// TIME_WAIT is here where if the ACK check fails, we attempt to
   382  		// reply back with an ACK with correct seq/ack numbers.
   383  		// https://github.com/torvalds/linux/blob/v5.8/net/ipv4/tcp_input.c#L6186
   384  		// The ESTABLISHED state processing is here where if the ACK check
   385  		// fails, we ignore the packet:
   386  		// https://github.com/torvalds/linux/blob/v5.8/net/ipv4/tcp_input.c#L5591
   387  		if r.ep.snd.SndNxt.LessThan(s.ackNumber) {
   388  			r.ep.snd.maybeSendOutOfWindowAck(s)
   389  			return true, nil
   390  		}
   391  
   392  		// If we are closed for reads (either due to an
   393  		// incoming FIN or the user calling shutdown(..,
   394  		// SHUT_RD) then any data past the RcvNxt should
   395  		// trigger a RST.
   396  		endDataSeq := s.sequenceNumber.Add(seqnum.Size(s.data.Size()))
   397  		if state != StateCloseWait && rcvClosed && r.RcvNxt.LessThan(endDataSeq) {
   398  			return true, &tcpip.ErrConnectionAborted{}
   399  		}
   400  		if state == StateFinWait1 {
   401  			break
   402  		}
   403  
   404  		// If it's a retransmission of an old data segment
   405  		// or a pure ACK then allow it.
   406  		if s.sequenceNumber.Add(s.logicalLen()).LessThanEq(r.RcvNxt) ||
   407  			s.logicalLen() == 0 {
   408  			break
   409  		}
   410  
   411  		// In FIN-WAIT2 if the socket is fully
   412  		// closed(not owned by application on our end
   413  		// then the only acceptable segment is a
   414  		// FIN. Since FIN can technically also carry
   415  		// data we verify that the segment carrying a
   416  		// FIN ends at exactly e.RcvNxt+1.
   417  		//
   418  		// From RFC793 page 25.
   419  		//
   420  		// For sequence number purposes, the SYN is
   421  		// considered to occur before the first actual
   422  		// data octet of the segment in which it occurs,
   423  		// while the FIN is considered to occur after
   424  		// the last actual data octet in a segment in
   425  		// which it occurs.
   426  		if closed && (!s.flags.Contains(header.TCPFlagFin) || s.sequenceNumber.Add(s.logicalLen()) != r.RcvNxt+1) {
   427  			return true, &tcpip.ErrConnectionAborted{}
   428  		}
   429  	}
   430  
   431  	// We don't care about receive processing anymore if the receive side
   432  	// is closed.
   433  	//
   434  	// NOTE: We still want to permit a FIN as it's possible only our
   435  	// end has closed and the peer is yet to send a FIN. Hence we
   436  	// compare only the payload.
   437  	segEnd := s.sequenceNumber.Add(seqnum.Size(s.data.Size()))
   438  	if rcvClosed && !segEnd.LessThanEq(r.RcvNxt) {
   439  		return true, nil
   440  	}
   441  	return false, nil
   442  }
   443  
   444  // handleRcvdSegment handles TCP segments directed at the connection managed by
   445  // r as they arrive. It is called by the protocol main loop.
   446  func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err tcpip.Error) {
   447  	state := r.ep.EndpointState()
   448  	closed := r.ep.closed
   449  
   450  	segLen := seqnum.Size(s.data.Size())
   451  	segSeq := s.sequenceNumber
   452  
   453  	// If the sequence number range is outside the acceptable range, just
   454  	// send an ACK and stop further processing of the segment.
   455  	// This is according to RFC 793, page 68.
   456  	if !r.acceptable(segSeq, segLen) {
   457  		r.ep.snd.maybeSendOutOfWindowAck(s)
   458  		return true, nil
   459  	}
   460  
   461  	if state != StateEstablished {
   462  		drop, err := r.handleRcvdSegmentClosing(s, state, closed)
   463  		if drop || err != nil {
   464  			return drop, err
   465  		}
   466  	}
   467  
   468  	// Store the time of the last ack.
   469  	r.lastRcvdAckTime = r.ep.stack.Clock().NowMonotonic()
   470  
   471  	// Defer segment processing if it can't be consumed now.
   472  	if !r.consumeSegment(s, segSeq, segLen) {
   473  		if segLen > 0 || s.flags.Contains(header.TCPFlagFin) {
   474  			// We only store the segment if it's within our buffer size limit.
   475  			//
   476  			// Only use 75% of the receive buffer queue for out-of-order
   477  			// segments. This ensures that we always leave some space for the inorder
   478  			// segments to arrive allowing pending segments to be processed and
   479  			// delivered to the user.
   480  			if rcvBufSize := r.ep.ops.GetReceiveBufferSize(); rcvBufSize > 0 && (r.PendingBufUsed+int(segLen)) < int(rcvBufSize)>>2 {
   481  				r.ep.rcvQueueInfo.rcvQueueMu.Lock()
   482  				r.PendingBufUsed += s.segMemSize()
   483  				r.ep.rcvQueueInfo.rcvQueueMu.Unlock()
   484  				s.incRef()
   485  				heap.Push(&r.pendingRcvdSegments, s)
   486  				UpdateSACKBlocks(&r.ep.sack, segSeq, segSeq.Add(segLen), r.RcvNxt)
   487  			}
   488  
   489  			// Immediately send an ack so that the peer knows it may
   490  			// have to retransmit.
   491  			r.ep.snd.sendAck()
   492  		}
   493  		return false, nil
   494  	}
   495  
   496  	// Since we consumed a segment update the receiver's RTT estimate
   497  	// if required.
   498  	if segLen > 0 {
   499  		r.updateRTT()
   500  	}
   501  
   502  	// By consuming the current segment, we may have filled a gap in the
   503  	// sequence number domain that allows pending segments to be consumed
   504  	// now. So try to do it.
   505  	for !r.closed && r.pendingRcvdSegments.Len() > 0 {
   506  		s := r.pendingRcvdSegments[0]
   507  		segLen := seqnum.Size(s.data.Size())
   508  		segSeq := s.sequenceNumber
   509  
   510  		// Skip segment altogether if it has already been acknowledged.
   511  		if !segSeq.Add(segLen-1).LessThan(r.RcvNxt) &&
   512  			!r.consumeSegment(s, segSeq, segLen) {
   513  			break
   514  		}
   515  
   516  		heap.Pop(&r.pendingRcvdSegments)
   517  		r.ep.rcvQueueInfo.rcvQueueMu.Lock()
   518  		r.PendingBufUsed -= s.segMemSize()
   519  		r.ep.rcvQueueInfo.rcvQueueMu.Unlock()
   520  		s.decRef()
   521  	}
   522  	return false, nil
   523  }
   524  
   525  // handleTimeWaitSegment handles inbound segments received when the endpoint
   526  // has entered the TIME_WAIT state.
   527  func (r *receiver) handleTimeWaitSegment(s *segment) (resetTimeWait bool, newSyn bool) {
   528  	segSeq := s.sequenceNumber
   529  	segLen := seqnum.Size(s.data.Size())
   530  
   531  	// Just silently drop any RST packets in TIME_WAIT. We do not support
   532  	// TIME_WAIT assasination as a result we confirm w/ fix 1 as described
   533  	// in https://tools.ietf.org/html/rfc1337#section-3.
   534  	//
   535  	// This behavior overrides RFC793 page 70 where we transition to CLOSED
   536  	// on receiving RST, which is also default Linux behavior.
   537  	// On Linux the RST can be ignored by setting sysctl net.ipv4.tcp_rfc1337.
   538  	//
   539  	// As we do not yet support PAWS, we are being conservative in ignoring
   540  	// RSTs by default.
   541  	if s.flags.Contains(header.TCPFlagRst) {
   542  		return false, false
   543  	}
   544  
   545  	// If it's a SYN and the sequence number is higher than any seen before
   546  	// for this connection then try and redirect it to a listening endpoint
   547  	// if available.
   548  	//
   549  	// RFC 1122:
   550  	//   "When a connection is [...] on TIME-WAIT state [...]
   551  	//   [a TCP] MAY accept a new SYN from the remote TCP to
   552  	//   reopen the connection directly, if it:
   553  
   554  	//    (1) assigns its initial sequence number for the new
   555  	//     connection to be larger than the largest sequence
   556  	//     number it used on the previous connection incarnation,
   557  	//     and
   558  
   559  	//    (2) returns to TIME-WAIT state if the SYN turns out
   560  	//      to be an old duplicate".
   561  	if s.flags.Contains(header.TCPFlagSyn) && r.RcvNxt.LessThan(segSeq) {
   562  		return false, true
   563  	}
   564  
   565  	// Drop the segment if it does not contain an ACK.
   566  	if !s.flags.Contains(header.TCPFlagAck) {
   567  		return false, false
   568  	}
   569  
   570  	// Update Timestamp if required. See RFC7323, section-4.3.
   571  	if r.ep.SendTSOk && s.parsedOptions.TS {
   572  		r.ep.updateRecentTimestamp(s.parsedOptions.TSVal, r.ep.snd.MaxSentAck, segSeq)
   573  	}
   574  
   575  	if segSeq.Add(1) == r.RcvNxt && s.flags.Contains(header.TCPFlagFin) {
   576  		// If it's a FIN-ACK then resetTimeWait and send an ACK, as it
   577  		// indicates our final ACK could have been lost.
   578  		r.ep.snd.sendAck()
   579  		return true, false
   580  	}
   581  
   582  	// If the sequence number range is outside the acceptable range or
   583  	// carries data then just send an ACK. This is according to RFC 793,
   584  	// page 37.
   585  	//
   586  	// NOTE: In TIME_WAIT the only acceptable sequence number is RcvNxt.
   587  	if segSeq != r.RcvNxt || segLen != 0 {
   588  		r.ep.snd.sendAck()
   589  	}
   590  	return false, false
   591  }