github.com/vpnishe/netstack@v1.10.6/tcpip/transport/tcp/snd.go

github.com/vpnishe/netstack@v1.10.6/tcpip/transport/tcp/snd.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tcp
    16  
    17  import (
    18  	"math"
    19  	"sync"
    20  	"sync/atomic"
    21  	"time"
    22  
    23  	"github.com/vpnishe/netstack/sleep"
    24  	"github.com/vpnishe/netstack/tcpip"
    25  	"github.com/vpnishe/netstack/tcpip/buffer"
    26  	"github.com/vpnishe/netstack/tcpip/header"
    27  	"github.com/vpnishe/netstack/tcpip/seqnum"
    28  )
    29  
    30  const (
    31  	// minRTO is the minimum allowed value for the retransmit timeout.
    32  	minRTO = 200 * time.Millisecond
    33  
    34  	// InitialCwnd is the initial congestion window.
    35  	InitialCwnd = 10
    36  
    37  	// nDupAckThreshold is the number of duplicate ACK's required
    38  	// before fast-retransmit is entered.
    39  	nDupAckThreshold = 3
    40  )
    41  
    42  // ccState indicates the current congestion control state for this sender.
    43  type ccState int
    44  
    45  const (
    46  	// Open indicates that the sender is receiving acks in order and
    47  	// no loss or dupACK's etc have been detected.
    48  	Open ccState = iota
    49  	// RTORecovery indicates that an RTO has occurred and the sender
    50  	// has entered an RTO based recovery phase.
    51  	RTORecovery
    52  	// FastRecovery indicates that the sender has entered FastRecovery
    53  	// based on receiving nDupAck's. This state is entered only when
    54  	// SACK is not in use.
    55  	FastRecovery
    56  	// SACKRecovery indicates that the sender has entered SACK based
    57  	// recovery.
    58  	SACKRecovery
    59  	// Disorder indicates the sender either received some SACK blocks
    60  	// or dupACK's.
    61  	Disorder
    62  )
    63  
    64  // congestionControl is an interface that must be implemented by any supported
    65  // congestion control algorithm.
    66  type congestionControl interface {
    67  	// HandleNDupAcks is invoked when sender.dupAckCount >= nDupAckThreshold
    68  	// just before entering fast retransmit.
    69  	HandleNDupAcks()
    70  
    71  	// HandleRTOExpired is invoked when the retransmit timer expires.
    72  	HandleRTOExpired()
    73  
    74  	// Update is invoked when processing inbound acks. It's passed the
    75  	// number of packet's that were acked by the most recent cumulative
    76  	// acknowledgement.
    77  	Update(packetsAcked int)
    78  
    79  	// PostRecovery is invoked when the sender is exiting a fast retransmit/
    80  	// recovery phase. This provides congestion control algorithms a way
    81  	// to adjust their state when exiting recovery.
    82  	PostRecovery()
    83  }
    84  
    85  // sender holds the state necessary to send TCP segments.
    86  //
    87  // +stateify savable
    88  type sender struct {
    89  	ep *endpoint
    90  
    91  	// lastSendTime is the timestamp when the last packet was sent.
    92  	lastSendTime time.Time
    93  
    94  	// dupAckCount is the number of duplicated acks received. It is used for
    95  	// fast retransmit.
    96  	dupAckCount int
    97  
    98  	// fr holds state related to fast recovery.
    99  	fr fastRecovery
   100  
   101  	// sndCwnd is the congestion window, in packets.
   102  	sndCwnd int
   103  
   104  	// sndSsthresh is the threshold between slow start and congestion
   105  	// avoidance.
   106  	sndSsthresh int
   107  
   108  	// sndCAAckCount is the number of packets acknowledged during congestion
   109  	// avoidance. When enough packets have been ack'd (typically cwnd
   110  	// packets), the congestion window is incremented by one.
   111  	sndCAAckCount int
   112  
   113  	// outstanding is the number of outstanding packets, that is, packets
   114  	// that have been sent but not yet acknowledged.
   115  	outstanding int
   116  
   117  	// sndWnd is the send window size.
   118  	sndWnd seqnum.Size
   119  
   120  	// sndUna is the next unacknowledged sequence number.
   121  	sndUna seqnum.Value
   122  
   123  	// sndNxt is the sequence number of the next segment to be sent.
   124  	sndNxt seqnum.Value
   125  
   126  	// sndNxtList is the sequence number of the next segment to be added to
   127  	// the send list.
   128  	sndNxtList seqnum.Value
   129  
   130  	// rttMeasureSeqNum is the sequence number being used for the latest RTT
   131  	// measurement.
   132  	rttMeasureSeqNum seqnum.Value
   133  
   134  	// rttMeasureTime is the time when the rttMeasureSeqNum was sent.
   135  	rttMeasureTime time.Time
   136  
   137  	closed      bool
   138  	writeNext   *segment
   139  	writeList   segmentList
   140  	resendTimer timer
   141  	resendWaker sleep.Waker
   142  
   143  	// rtt.srtt, rtt.rttvar, and rto are the "smoothed round-trip time",
   144  	// "round-trip time variation" and "retransmit timeout", as defined in
   145  	// section 2 of RFC 6298.
   146  	rtt rtt
   147  	rto time.Duration
   148  
   149  	// maxPayloadSize is the maximum size of the payload of a given segment.
   150  	// It is initialized on demand.
   151  	maxPayloadSize int
   152  
   153  	// gso is set if generic segmentation offload is enabled.
   154  	gso bool
   155  
   156  	// sndWndScale is the number of bits to shift left when reading the send
   157  	// window size from a segment.
   158  	sndWndScale uint8
   159  
   160  	// maxSentAck is the maxium acknowledgement actually sent.
   161  	maxSentAck seqnum.Value
   162  
   163  	// state is the current state of congestion control for this endpoint.
   164  	state ccState
   165  
   166  	// cc is the congestion control algorithm in use for this sender.
   167  	cc congestionControl
   168  }
   169  
   170  // rtt is a synchronization wrapper used to appease stateify. See the comment
   171  // in sender, where it is used.
   172  //
   173  // +stateify savable
   174  type rtt struct {
   175  	sync.Mutex
   176  
   177  	srtt       time.Duration
   178  	rttvar     time.Duration
   179  	srttInited bool
   180  }
   181  
   182  // fastRecovery holds information related to fast recovery from a packet loss.
   183  //
   184  // +stateify savable
   185  type fastRecovery struct {
   186  	// active whether the endpoint is in fast recovery. The following fields
   187  	// are only meaningful when active is true.
   188  	active bool
   189  
   190  	// first and last represent the inclusive sequence number range being
   191  	// recovered.
   192  	first seqnum.Value
   193  	last  seqnum.Value
   194  
   195  	// maxCwnd is the maximum value the congestion window may be inflated to
   196  	// due to duplicate acks. This exists to avoid attacks where the
   197  	// receiver intentionally sends duplicate acks to artificially inflate
   198  	// the sender's cwnd.
   199  	maxCwnd int
   200  
   201  	// highRxt is the highest sequence number which has been retransmitted
   202  	// during the current loss recovery phase.
   203  	// See: RFC 6675 Section 2 for details.
   204  	highRxt seqnum.Value
   205  
   206  	// rescueRxt is the highest sequence number which has been
   207  	// optimistically retransmitted to prevent stalling of the ACK clock
   208  	// when there is loss at the end of the window and no new data is
   209  	// available for transmission.
   210  	// See: RFC 6675 Section 2 for details.
   211  	rescueRxt seqnum.Value
   212  }
   213  
   214  func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint16, sndWndScale int) *sender {
   215  	// The sender MUST reduce the TCP data length to account for any IP or
   216  	// TCP options that it is including in the packets that it sends.
   217  	// See: https://tools.ietf.org/html/rfc6691#section-2
   218  	maxPayloadSize := int(mss) - ep.maxOptionSize()
   219  
   220  	s := &sender{
   221  		ep:               ep,
   222  		sndWnd:           sndWnd,
   223  		sndUna:           iss + 1,
   224  		sndNxt:           iss + 1,
   225  		sndNxtList:       iss + 1,
   226  		rto:              1 * time.Second,
   227  		rttMeasureSeqNum: iss + 1,
   228  		lastSendTime:     time.Now(),
   229  		maxPayloadSize:   maxPayloadSize,
   230  		maxSentAck:       irs + 1,
   231  		fr: fastRecovery{
   232  			// See: https://tools.ietf.org/html/rfc6582#section-3.2 Step 1.
   233  			last:      iss,
   234  			highRxt:   iss,
   235  			rescueRxt: iss,
   236  		},
   237  		gso: ep.gso != nil,
   238  	}
   239  
   240  	if s.gso {
   241  		s.ep.gso.MSS = uint16(maxPayloadSize)
   242  	}
   243  
   244  	s.cc = s.initCongestionControl(ep.cc)
   245  
   246  	// A negative sndWndScale means that no scaling is in use, otherwise we
   247  	// store the scaling value.
   248  	if sndWndScale > 0 {
   249  		s.sndWndScale = uint8(sndWndScale)
   250  	}
   251  
   252  	s.resendTimer.init(&s.resendWaker)
   253  
   254  	s.updateMaxPayloadSize(int(ep.route.MTU()), 0)
   255  
   256  	// Initialize SACK Scoreboard after updating max payload size as we use
   257  	// the maxPayloadSize as the smss when determining if a segment is lost
   258  	// etc.
   259  	s.ep.scoreboard = NewSACKScoreboard(uint16(s.maxPayloadSize), iss)
   260  
   261  	return s
   262  }
   263  
   264  // initCongestionControl initializes the specified congestion control module and
   265  // returns a handle to it. It also initializes the sndCwnd and sndSsThresh to
   266  // their initial values.
   267  func (s *sender) initCongestionControl(congestionControlName tcpip.CongestionControlOption) congestionControl {
   268  	s.sndCwnd = InitialCwnd
   269  	s.sndSsthresh = math.MaxUint16
   270  
   271  	switch congestionControlName {
   272  	case ccCubic:
   273  		return newCubicCC(s)
   274  	case ccReno:
   275  		fallthrough
   276  	default:
   277  		return newRenoCC(s)
   278  	}
   279  }
   280  
   281  // updateMaxPayloadSize updates the maximum payload size based on the given
   282  // MTU. If this is in response to "packet too big" control packets (indicated
   283  // by the count argument), it also reduces the number of outstanding packets and
   284  // attempts to retransmit the first packet above the MTU size.
   285  func (s *sender) updateMaxPayloadSize(mtu, count int) {
   286  	m := mtu - header.TCPMinimumSize
   287  
   288  	m -= s.ep.maxOptionSize()
   289  
   290  	// We don't adjust up for now.
   291  	if m >= s.maxPayloadSize {
   292  		return
   293  	}
   294  
   295  	// Make sure we can transmit at least one byte.
   296  	if m <= 0 {
   297  		m = 1
   298  	}
   299  
   300  	s.maxPayloadSize = m
   301  	if s.gso {
   302  		s.ep.gso.MSS = uint16(m)
   303  	}
   304  
   305  	if count == 0 {
   306  		// updateMaxPayloadSize is also called when the sender is created.
   307  		// and there is no data to send in such cases. Return immediately.
   308  		return
   309  	}
   310  
   311  	// Update the scoreboard's smss to reflect the new lowered
   312  	// maxPayloadSize.
   313  	s.ep.scoreboard.smss = uint16(m)
   314  
   315  	s.outstanding -= count
   316  	if s.outstanding < 0 {
   317  		s.outstanding = 0
   318  	}
   319  
   320  	// Rewind writeNext to the first segment exceeding the MTU. Do nothing
   321  	// if it is already before such a packet.
   322  	for seg := s.writeList.Front(); seg != nil; seg = seg.Next() {
   323  		if seg == s.writeNext {
   324  			// We got to writeNext before we could find a segment
   325  			// exceeding the MTU.
   326  			break
   327  		}
   328  
   329  		if seg.data.Size() > m {
   330  			// We found a segment exceeding the MTU. Rewind
   331  			// writeNext and try to retransmit it.
   332  			s.writeNext = seg
   333  			break
   334  		}
   335  	}
   336  
   337  	// Since we likely reduced the number of outstanding packets, we may be
   338  	// ready to send some more.
   339  	s.sendData()
   340  }
   341  
   342  // sendAck sends an ACK segment.
   343  func (s *sender) sendAck() {
   344  	s.sendSegmentFromView(buffer.VectorisedView{}, header.TCPFlagAck, s.sndNxt)
   345  }
   346  
   347  // updateRTO updates the retransmit timeout when a new roud-trip time is
   348  // available. This is done in accordance with section 2 of RFC 6298.
   349  func (s *sender) updateRTO(rtt time.Duration) {
   350  	s.rtt.Lock()
   351  	if !s.rtt.srttInited {
   352  		s.rtt.rttvar = rtt / 2
   353  		s.rtt.srtt = rtt
   354  		s.rtt.srttInited = true
   355  	} else {
   356  		diff := s.rtt.srtt - rtt
   357  		if diff < 0 {
   358  			diff = -diff
   359  		}
   360  		// Use RFC6298 standard algorithm to update rttvar and srtt when
   361  		// no timestamps are available.
   362  		if !s.ep.sendTSOk {
   363  			s.rtt.rttvar = (3*s.rtt.rttvar + diff) / 4
   364  			s.rtt.srtt = (7*s.rtt.srtt + rtt) / 8
   365  		} else {
   366  			// When we are taking RTT measurements of every ACK then
   367  			// we need to use a modified method as specified in
   368  			// https://tools.ietf.org/html/rfc7323#appendix-G
   369  			if s.outstanding == 0 {
   370  				s.rtt.Unlock()
   371  				return
   372  			}
   373  			// Netstack measures congestion window/inflight all in
   374  			// terms of packets and not bytes. This is similar to
   375  			// how linux also does cwnd and inflight. In practice
   376  			// this approximation works as expected.
   377  			expectedSamples := math.Ceil(float64(s.outstanding) / 2)
   378  
   379  			// alpha & beta values are the original values as recommended in
   380  			// https://tools.ietf.org/html/rfc6298#section-2.3.
   381  			const alpha = 0.125
   382  			const beta = 0.25
   383  
   384  			alphaPrime := alpha / expectedSamples
   385  			betaPrime := beta / expectedSamples
   386  			rttVar := (1-betaPrime)*s.rtt.rttvar.Seconds() + betaPrime*diff.Seconds()
   387  			srtt := (1-alphaPrime)*s.rtt.srtt.Seconds() + alphaPrime*rtt.Seconds()
   388  			s.rtt.rttvar = time.Duration(rttVar * float64(time.Second))
   389  			s.rtt.srtt = time.Duration(srtt * float64(time.Second))
   390  		}
   391  	}
   392  
   393  	s.rto = s.rtt.srtt + 4*s.rtt.rttvar
   394  	s.rtt.Unlock()
   395  	if s.rto < minRTO {
   396  		s.rto = minRTO
   397  	}
   398  }
   399  
   400  // resendSegment resends the first unacknowledged segment.
   401  func (s *sender) resendSegment() {
   402  	// Don't use any segments we already sent to measure RTT as they may
   403  	// have been affected by packets being lost.
   404  	s.rttMeasureSeqNum = s.sndNxt
   405  
   406  	// Resend the segment.
   407  	if seg := s.writeList.Front(); seg != nil {
   408  		if seg.data.Size() > s.maxPayloadSize {
   409  			s.splitSeg(seg, s.maxPayloadSize)
   410  		}
   411  
   412  		// See: RFC 6675 section 5 Step 4.3
   413  		//
   414  		// To prevent retransmission, set both the HighRXT and RescueRXT
   415  		// to the highest sequence number in the retransmitted segment.
   416  		s.fr.highRxt = seg.sequenceNumber.Add(seqnum.Size(seg.data.Size())) - 1
   417  		s.fr.rescueRxt = seg.sequenceNumber.Add(seqnum.Size(seg.data.Size())) - 1
   418  		s.sendSegment(seg)
   419  		s.ep.stack.Stats().TCP.FastRetransmit.Increment()
   420  		s.ep.stats.SendErrors.FastRetransmit.Increment()
   421  
   422  		// Run SetPipe() as per RFC 6675 section 5 Step 4.4
   423  		s.SetPipe()
   424  	}
   425  }
   426  
   427  // retransmitTimerExpired is called when the retransmit timer expires, and
   428  // unacknowledged segments are assumed lost, and thus need to be resent.
   429  // Returns true if the connection is still usable, or false if the connection
   430  // is deemed lost.
   431  func (s *sender) retransmitTimerExpired() bool {
   432  	// Check if the timer actually expired or if it's a spurious wake due
   433  	// to a previously orphaned runtime timer.
   434  	if !s.resendTimer.checkExpiration() {
   435  		return true
   436  	}
   437  
   438  	s.ep.stack.Stats().TCP.Timeouts.Increment()
   439  	s.ep.stats.SendErrors.Timeouts.Increment()
   440  
   441  	// Give up if we've waited more than a minute since the last resend.
   442  	if s.rto >= 60*time.Second {
   443  		return false
   444  	}
   445  
   446  	// Set new timeout. The timer will be restarted by the call to sendData
   447  	// below.
   448  	s.rto *= 2
   449  
   450  	// See: https://tools.ietf.org/html/rfc6582#section-3.2 Step 4.
   451  	//
   452  	// Retransmit timeouts:
   453  	//     After a retransmit timeout, record the highest sequence number
   454  	//     transmitted in the variable recover, and exit the fast recovery
   455  	//     procedure if applicable.
   456  	s.fr.last = s.sndNxt - 1
   457  
   458  	if s.fr.active {
   459  		// We were attempting fast recovery but were not successful.
   460  		// Leave the state. We don't need to update ssthresh because it
   461  		// has already been updated when entered fast-recovery.
   462  		s.leaveFastRecovery()
   463  	}
   464  
   465  	s.state = RTORecovery
   466  	s.cc.HandleRTOExpired()
   467  
   468  	// Mark the next segment to be sent as the first unacknowledged one and
   469  	// start sending again. Set the number of outstanding packets to 0 so
   470  	// that we'll be able to retransmit.
   471  	//
   472  	// We'll keep on transmitting (or retransmitting) as we get acks for
   473  	// the data we transmit.
   474  	s.outstanding = 0
   475  
   476  	// Expunge all SACK information as per https://tools.ietf.org/html/rfc6675#section-5.1
   477  	//
   478  	//  In order to avoid memory deadlocks, the TCP receiver is allowed to
   479  	//  discard data that has already been selectively acknowledged. As a
   480  	//  result, [RFC2018] suggests that a TCP sender SHOULD expunge the SACK
   481  	//  information gathered from a receiver upon a retransmission timeout
   482  	//  (RTO) "since the timeout might indicate that the data receiver has
   483  	//  reneged." Additionally, a TCP sender MUST "ignore prior SACK
   484  	//  information in determining which data to retransmit."
   485  	//
   486  	// NOTE: We take the stricter interpretation and just expunge all
   487  	// information as we lack more rigorous checks to validate if the SACK
   488  	// information is usable after an RTO.
   489  	s.ep.scoreboard.Reset()
   490  	s.writeNext = s.writeList.Front()
   491  	s.sendData()
   492  
   493  	return true
   494  }
   495  
   496  // pCount returns the number of packets in the segment. Due to GSO, a segment
   497  // can be composed of multiple packets.
   498  func (s *sender) pCount(seg *segment) int {
   499  	size := seg.data.Size()
   500  	if size == 0 {
   501  		return 1
   502  	}
   503  
   504  	return (size-1)/s.maxPayloadSize + 1
   505  }
   506  
   507  // splitSeg splits a given segment at the size specified and inserts the
   508  // remainder as a new segment after the current one in the write list.
   509  func (s *sender) splitSeg(seg *segment, size int) {
   510  	if seg.data.Size() <= size {
   511  		return
   512  	}
   513  	// Split this segment up.
   514  	nSeg := seg.clone()
   515  	nSeg.data.TrimFront(size)
   516  	nSeg.sequenceNumber.UpdateForward(seqnum.Size(size))
   517  	s.writeList.InsertAfter(seg, nSeg)
   518  	seg.data.CapLength(size)
   519  }
   520  
   521  // NextSeg implements the RFC6675 NextSeg() operation. It returns segments that
   522  // match rule 1, 3 and 4 of the NextSeg() operation defined in RFC6675. Rule 2
   523  // is handled by the normal send logic.
   524  func (s *sender) NextSeg() (nextSeg1, nextSeg3, nextSeg4 *segment) {
   525  	var s3 *segment
   526  	var s4 *segment
   527  	smss := s.ep.scoreboard.SMSS()
   528  	// Step 1.
   529  	for seg := s.writeList.Front(); seg != nil; seg = seg.Next() {
   530  		if !s.isAssignedSequenceNumber(seg) {
   531  			break
   532  		}
   533  		segSeq := seg.sequenceNumber
   534  		if seg.data.Size() > int(smss) {
   535  			s.splitSeg(seg, int(smss))
   536  		}
   537  		// See RFC 6675 Section 4
   538  		//
   539  		//     1. If there exists a smallest unSACKED sequence number
   540  		//     'S2' that meets the following 3 criteria for determinig
   541  		//     loss, the sequence range of one segment of up to SMSS
   542  		//     octects starting with S2 MUST be returned.
   543  		if !s.ep.scoreboard.IsSACKED(header.SACKBlock{segSeq, segSeq.Add(1)}) {
   544  			// NextSeg():
   545  			//
   546  			//    (1.a) S2 is greater than HighRxt
   547  			//    (1.b) S2 is less than highest octect covered by
   548  			//    any received SACK.
   549  			if s.fr.highRxt.LessThan(segSeq) && segSeq.LessThan(s.ep.scoreboard.maxSACKED) {
   550  				// NextSeg():
   551  				//     (1.c) IsLost(S2) returns true.
   552  				if s.ep.scoreboard.IsLost(segSeq) {
   553  					return seg, s3, s4
   554  				}
   555  				// NextSeg():
   556  				//
   557  				// (3): If the conditions for rules (1) and (2)
   558  				// fail, but there exists an unSACKed sequence
   559  				// number S3 that meets the criteria for
   560  				// detecting loss given in steps 1.a and 1.b
   561  				// above (specifically excluding (1.c)) then one
   562  				// segment of upto SMSS octets starting with S3
   563  				// SHOULD be returned.
   564  				if s3 == nil {
   565  					s3 = seg
   566  				}
   567  			}
   568  			// NextSeg():
   569  			//
   570  			//     (4) If the conditions for (1), (2) and (3) fail,
   571  			//     but there exists outstanding unSACKED data, we
   572  			//     provide the opportunity for a single "rescue"
   573  			//     retransmission per entry into loss recovery. If
   574  			//     HighACK is greater than RescueRxt, the one
   575  			//     segment of upto SMSS octects that MUST include
   576  			//     the highest outstanding unSACKed sequence number
   577  			//     SHOULD be returned.
   578  			if s.fr.rescueRxt.LessThan(s.sndUna - 1) {
   579  				if s4 != nil {
   580  					if s4.sequenceNumber.LessThan(segSeq) {
   581  						s4 = seg
   582  					}
   583  				} else {
   584  					s4 = seg
   585  				}
   586  				s.fr.rescueRxt = s.fr.last
   587  			}
   588  		}
   589  	}
   590  
   591  	return nil, s3, s4
   592  }
   593  
   594  // maybeSendSegment tries to send the specified segment and either coalesces
   595  // other segments into this one or splits the specified segment based on the
   596  // lower of the specified limit value or the receivers window size specified by
   597  // end.
   598  func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (sent bool) {
   599  	// We abuse the flags field to determine if we have already
   600  	// assigned a sequence number to this segment.
   601  	if !s.isAssignedSequenceNumber(seg) {
   602  		// Merge segments if allowed.
   603  		if seg.data.Size() != 0 {
   604  			available := int(seg.sequenceNumber.Size(end))
   605  			if available > limit {
   606  				available = limit
   607  			}
   608  
   609  			// nextTooBig indicates that the next segment was too
   610  			// large to entirely fit in the current segment. It
   611  			// would be possible to split the next segment and merge
   612  			// the portion that fits, but unexpectedly splitting
   613  			// segments can have user visible side-effects which can
   614  			// break applications. For example, RFC 7766 section 8
   615  			// says that the length and data of a DNS response
   616  			// should be sent in the same TCP segment to avoid
   617  			// triggering bugs in poorly written DNS
   618  			// implementations.
   619  			var nextTooBig bool
   620  			for seg.Next() != nil && seg.Next().data.Size() != 0 {
   621  				if seg.data.Size()+seg.Next().data.Size() > available {
   622  					nextTooBig = true
   623  					break
   624  				}
   625  				seg.data.Append(seg.Next().data)
   626  
   627  				// Consume the segment that we just merged in.
   628  				s.writeList.Remove(seg.Next())
   629  			}
   630  			if !nextTooBig && seg.data.Size() < available {
   631  				// Segment is not full.
   632  				if s.outstanding > 0 && atomic.LoadUint32(&s.ep.delay) != 0 {
   633  					// Nagle's algorithm. From Wikipedia:
   634  					//   Nagle's algorithm works by
   635  					//   combining a number of small
   636  					//   outgoing messages and sending them
   637  					//   all at once. Specifically, as long
   638  					//   as there is a sent packet for which
   639  					//   the sender has received no
   640  					//   acknowledgment, the sender should
   641  					//   keep buffering its output until it
   642  					//   has a full packet's worth of
   643  					//   output, thus allowing output to be
   644  					//   sent all at once.
   645  					return false
   646  				}
   647  				if atomic.LoadUint32(&s.ep.cork) != 0 {
   648  					// Hold back the segment until full.
   649  					return false
   650  				}
   651  			}
   652  		}
   653  
   654  		// Assign flags. We don't do it above so that we can merge
   655  		// additional data if Nagle holds the segment.
   656  		seg.sequenceNumber = s.sndNxt
   657  		seg.flags = header.TCPFlagAck | header.TCPFlagPsh
   658  	}
   659  
   660  	var segEnd seqnum.Value
   661  	if seg.data.Size() == 0 {
   662  		if s.writeList.Back() != seg {
   663  			panic("FIN segments must be the final segment in the write list.")
   664  		}
   665  		seg.flags = header.TCPFlagAck | header.TCPFlagFin
   666  		segEnd = seg.sequenceNumber.Add(1)
   667  		// Transition to FIN-WAIT1 state since we're initiating an active close.
   668  		s.ep.mu.Lock()
   669  		switch s.ep.state {
   670  		case StateCloseWait:
   671  			// We've already received a FIN and are now sending our own. The
   672  			// sender is now awaiting a final ACK for this FIN.
   673  			s.ep.state = StateLastAck
   674  		default:
   675  			s.ep.state = StateFinWait1
   676  		}
   677  		s.ep.stack.Stats().TCP.CurrentEstablished.Decrement()
   678  		s.ep.mu.Unlock()
   679  	} else {
   680  		// We're sending a non-FIN segment.
   681  		if seg.flags&header.TCPFlagFin != 0 {
   682  			panic("Netstack queues FIN segments without data.")
   683  		}
   684  
   685  		if !seg.sequenceNumber.LessThan(end) {
   686  			return false
   687  		}
   688  
   689  		available := int(seg.sequenceNumber.Size(end))
   690  		if available == 0 {
   691  			return false
   692  		}
   693  		if available > limit {
   694  			available = limit
   695  		}
   696  
   697  		if seg.data.Size() > available {
   698  			s.splitSeg(seg, available)
   699  		}
   700  
   701  		segEnd = seg.sequenceNumber.Add(seqnum.Size(seg.data.Size()))
   702  	}
   703  
   704  	s.sendSegment(seg)
   705  
   706  	// Update sndNxt if we actually sent new data (as opposed to
   707  	// retransmitting some previously sent data).
   708  	if s.sndNxt.LessThan(segEnd) {
   709  		s.sndNxt = segEnd
   710  	}
   711  
   712  	return true
   713  }
   714  
   715  // handleSACKRecovery implements the loss recovery phase as described in RFC6675
   716  // section 5, step C.
   717  func (s *sender) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool) {
   718  	s.SetPipe()
   719  	for s.outstanding < s.sndCwnd {
   720  		nextSeg, s3, s4 := s.NextSeg()
   721  		if nextSeg == nil {
   722  			// NextSeg():
   723  			//
   724  			// Step (2): "If no sequence number 'S2' per rule (1)
   725  			// exists but there exists available unsent data and the
   726  			// receiver's advertised window allows, the sequence
   727  			// range of one segment of up to SMSS octets of
   728  			// previously unsent data starting with sequence number
   729  			// HighData+1 MUST be returned."
   730  			for seg := s.writeNext; seg != nil; seg = seg.Next() {
   731  				if s.isAssignedSequenceNumber(seg) && seg.sequenceNumber.LessThan(s.sndNxt) {
   732  					continue
   733  				}
   734  				// Step C.3 described below is handled by
   735  				// maybeSendSegment which increments sndNxt when
   736  				// a segment is transmitted.
   737  				//
   738  				// Step C.3 "If any of the data octets sent in
   739  				// (C.1) are above HighData, HighData must be
   740  				// updated to reflect the transmission of
   741  				// previously unsent data."
   742  				if sent := s.maybeSendSegment(seg, limit, end); !sent {
   743  					break
   744  				}
   745  				dataSent = true
   746  				s.outstanding++
   747  				s.writeNext = seg.Next()
   748  				nextSeg = seg
   749  				break
   750  			}
   751  			if nextSeg != nil {
   752  				continue
   753  			}
   754  		}
   755  		rescueRtx := false
   756  		if nextSeg == nil && s3 != nil {
   757  			nextSeg = s3
   758  		}
   759  		if nextSeg == nil && s4 != nil {
   760  			nextSeg = s4
   761  			rescueRtx = true
   762  		}
   763  		if nextSeg == nil {
   764  			break
   765  		}
   766  		segEnd := nextSeg.sequenceNumber.Add(nextSeg.logicalLen())
   767  		if !rescueRtx && nextSeg.sequenceNumber.LessThan(s.sndNxt) {
   768  			// RFC 6675, Step C.2
   769  			//
   770  			// "If any of the data octets sent in (C.1) are below
   771  			// HighData, HighRxt MUST be set to the highest sequence
   772  			// number of the retransmitted segment unless NextSeg ()
   773  			// rule (4) was invoked for this retransmission."
   774  			s.fr.highRxt = segEnd - 1
   775  		}
   776  
   777  		// RFC 6675, Step C.4.
   778  		//
   779  		// "The estimate of the amount of data outstanding in the network
   780  		// must be updated by incrementing pipe by the number of octets
   781  		// transmitted in (C.1)."
   782  		s.outstanding++
   783  		dataSent = true
   784  		s.sendSegment(nextSeg)
   785  	}
   786  	return dataSent
   787  }
   788  
   789  // sendData sends new data segments. It is called when data becomes available or
   790  // when the send window opens up.
   791  func (s *sender) sendData() {
   792  	limit := s.maxPayloadSize
   793  	if s.gso {
   794  		limit = int(s.ep.gso.MaxSize - header.TCPHeaderMaximumSize)
   795  	}
   796  	end := s.sndUna.Add(s.sndWnd)
   797  
   798  	// Reduce the congestion window to min(IW, cwnd) per RFC 5681, page 10.
   799  	// "A TCP SHOULD set cwnd to no more than RW before beginning
   800  	// transmission if the TCP has not sent data in the interval exceeding
   801  	// the retrasmission timeout."
   802  	if !s.fr.active && time.Now().Sub(s.lastSendTime) > s.rto {
   803  		if s.sndCwnd > InitialCwnd {
   804  			s.sndCwnd = InitialCwnd
   805  		}
   806  	}
   807  
   808  	var dataSent bool
   809  
   810  	// RFC 6675 recovery algorithm step C 1-5.
   811  	if s.fr.active && s.ep.sackPermitted {
   812  		dataSent = s.handleSACKRecovery(s.maxPayloadSize, end)
   813  	} else {
   814  		for seg := s.writeNext; seg != nil && s.outstanding < s.sndCwnd; seg = seg.Next() {
   815  			cwndLimit := (s.sndCwnd - s.outstanding) * s.maxPayloadSize
   816  			if cwndLimit < limit {
   817  				limit = cwndLimit
   818  			}
   819  			if s.isAssignedSequenceNumber(seg) && s.ep.sackPermitted && s.ep.scoreboard.IsSACKED(seg.sackBlock()) {
   820  				continue
   821  			}
   822  			if sent := s.maybeSendSegment(seg, limit, end); !sent {
   823  				break
   824  			}
   825  			dataSent = true
   826  			s.outstanding += s.pCount(seg)
   827  			s.writeNext = seg.Next()
   828  		}
   829  	}
   830  
   831  	if dataSent {
   832  		// We sent data, so we should stop the keepalive timer to ensure
   833  		// that no keepalives are sent while there is pending data.
   834  		s.ep.disableKeepaliveTimer()
   835  	}
   836  
   837  	// Enable the timer if we have pending data and it's not enabled yet.
   838  	if !s.resendTimer.enabled() && s.sndUna != s.sndNxt {
   839  		s.resendTimer.enable(s.rto)
   840  	}
   841  	// If we have no more pending data, start the keepalive timer.
   842  	if s.sndUna == s.sndNxt {
   843  		s.ep.resetKeepaliveTimer(false)
   844  	}
   845  }
   846  
   847  func (s *sender) enterFastRecovery() {
   848  	s.fr.active = true
   849  	// Save state to reflect we're now in fast recovery.
   850  	//
   851  	// See : https://tools.ietf.org/html/rfc5681#section-3.2 Step 3.
   852  	// We inflate the cwnd by 3 to account for the 3 packets which triggered
   853  	// the 3 duplicate ACKs and are now not in flight.
   854  	s.sndCwnd = s.sndSsthresh + 3
   855  	s.fr.first = s.sndUna
   856  	s.fr.last = s.sndNxt - 1
   857  	s.fr.maxCwnd = s.sndCwnd + s.outstanding
   858  	if s.ep.sackPermitted {
   859  		s.state = SACKRecovery
   860  		s.ep.stack.Stats().TCP.SACKRecovery.Increment()
   861  		return
   862  	}
   863  	s.state = FastRecovery
   864  	s.ep.stack.Stats().TCP.FastRecovery.Increment()
   865  }
   866  
   867  func (s *sender) leaveFastRecovery() {
   868  	s.fr.active = false
   869  	s.fr.maxCwnd = 0
   870  	s.dupAckCount = 0
   871  
   872  	// Deflate cwnd. It had been artificially inflated when new dups arrived.
   873  	s.sndCwnd = s.sndSsthresh
   874  
   875  	s.cc.PostRecovery()
   876  }
   877  
   878  func (s *sender) handleFastRecovery(seg *segment) (rtx bool) {
   879  	ack := seg.ackNumber
   880  	// We are in fast recovery mode. Ignore the ack if it's out of
   881  	// range.
   882  	if !ack.InRange(s.sndUna, s.sndNxt+1) {
   883  		return false
   884  	}
   885  
   886  	// Leave fast recovery if it acknowledges all the data covered by
   887  	// this fast recovery session.
   888  	if s.fr.last.LessThan(ack) {
   889  		s.leaveFastRecovery()
   890  		return false
   891  	}
   892  
   893  	if s.ep.sackPermitted {
   894  		// When SACK is enabled we let retransmission be governed by
   895  		// the SACK logic.
   896  		return false
   897  	}
   898  
   899  	// Don't count this as a duplicate if it is carrying data or
   900  	// updating the window.
   901  	if seg.logicalLen() != 0 || s.sndWnd != seg.window {
   902  		return false
   903  	}
   904  
   905  	// Inflate the congestion window if we're getting duplicate acks
   906  	// for the packet we retransmitted.
   907  	if ack == s.fr.first {
   908  		// We received a dup, inflate the congestion window by 1 packet
   909  		// if we're not at the max yet. Only inflate the window if
   910  		// regular FastRecovery is in use, RFC6675 does not require
   911  		// inflating cwnd on duplicate ACKs.
   912  		if s.sndCwnd < s.fr.maxCwnd {
   913  			s.sndCwnd++
   914  		}
   915  		return false
   916  	}
   917  
   918  	// A partial ack was received. Retransmit this packet and
   919  	// remember it so that we don't retransmit it again. We don't
   920  	// inflate the window because we're putting the same packet back
   921  	// onto the wire.
   922  	//
   923  	// N.B. The retransmit timer will be reset by the caller.
   924  	s.fr.first = ack
   925  	s.dupAckCount = 0
   926  	return true
   927  }
   928  
   929  // isAssignedSequenceNumber relies on the fact that we only set flags once a
   930  // sequencenumber is assigned and that is only done right before we send the
   931  // segment. As a result any segment that has a non-zero flag has a valid
   932  // sequence number assigned to it.
   933  func (s *sender) isAssignedSequenceNumber(seg *segment) bool {
   934  	return seg.flags != 0
   935  }
   936  
   937  // SetPipe implements the SetPipe() function described in RFC6675. Netstack
   938  // maintains the congestion window in number of packets and not bytes, so
   939  // SetPipe() here measures number of outstanding packets rather than actual
   940  // outstanding bytes in the network.
   941  func (s *sender) SetPipe() {
   942  	// If SACK isn't permitted or it is permitted but recovery is not active
   943  	// then ignore pipe calculations.
   944  	if !s.ep.sackPermitted || !s.fr.active {
   945  		return
   946  	}
   947  	pipe := 0
   948  	smss := seqnum.Size(s.ep.scoreboard.SMSS())
   949  	for s1 := s.writeList.Front(); s1 != nil && s1.data.Size() != 0 && s.isAssignedSequenceNumber(s1); s1 = s1.Next() {
   950  		// With GSO each segment can be much larger than SMSS. So check the segment
   951  		// in SMSS sized ranges.
   952  		segEnd := s1.sequenceNumber.Add(seqnum.Size(s1.data.Size()))
   953  		for startSeq := s1.sequenceNumber; startSeq.LessThan(segEnd); startSeq = startSeq.Add(smss) {
   954  			endSeq := startSeq.Add(smss)
   955  			if segEnd.LessThan(endSeq) {
   956  				endSeq = segEnd
   957  			}
   958  			sb := header.SACKBlock{startSeq, endSeq}
   959  			// SetPipe():
   960  			//
   961  			// After initializing pipe to zero, the following steps are
   962  			// taken for each octet 'S1' in the sequence space between
   963  			// HighACK and HighData that has not been SACKed:
   964  			if !s1.sequenceNumber.LessThan(s.sndNxt) {
   965  				break
   966  			}
   967  			if s.ep.scoreboard.IsSACKED(sb) {
   968  				continue
   969  			}
   970  
   971  			// SetPipe():
   972  			//
   973  			//    (a) If IsLost(S1) returns false, Pipe is incremened by 1.
   974  			//
   975  			// NOTE: here we mark the whole segment as lost. We do not try
   976  			// and test every byte in our write buffer as we maintain our
   977  			// pipe in terms of oustanding packets and not bytes.
   978  			if !s.ep.scoreboard.IsRangeLost(sb) {
   979  				pipe++
   980  			}
   981  			// SetPipe():
   982  			//    (b) If S1 <= HighRxt, Pipe is incremented by 1.
   983  			if s1.sequenceNumber.LessThanEq(s.fr.highRxt) {
   984  				pipe++
   985  			}
   986  		}
   987  	}
   988  	s.outstanding = pipe
   989  }
   990  
   991  // checkDuplicateAck is called when an ack is received. It manages the state
   992  // related to duplicate acks and determines if a retransmit is needed according
   993  // to the rules in RFC 6582 (NewReno).
   994  func (s *sender) checkDuplicateAck(seg *segment) (rtx bool) {
   995  	ack := seg.ackNumber
   996  	if s.fr.active {
   997  		return s.handleFastRecovery(seg)
   998  	}
   999  
  1000  	// We're not in fast recovery yet. A segment is considered a duplicate
  1001  	// only if it doesn't carry any data and doesn't update the send window,
  1002  	// because if it does, it wasn't sent in response to an out-of-order
  1003  	// segment. If SACK is enabled then we have an additional check to see
  1004  	// if the segment carries new SACK information. If it does then it is
  1005  	// considered a duplicate ACK as per RFC6675.
  1006  	if ack != s.sndUna || seg.logicalLen() != 0 || s.sndWnd != seg.window || ack == s.sndNxt {
  1007  		if !s.ep.sackPermitted || !seg.hasNewSACKInfo {
  1008  			s.dupAckCount = 0
  1009  			return false
  1010  		}
  1011  	}
  1012  
  1013  	s.dupAckCount++
  1014  
  1015  	// Do not enter fast recovery until we reach nDupAckThreshold or the
  1016  	// first unacknowledged byte is considered lost as per SACK scoreboard.
  1017  	if s.dupAckCount < nDupAckThreshold || (s.ep.sackPermitted && !s.ep.scoreboard.IsLost(s.sndUna)) {
  1018  		// RFC 6675 Step 3.
  1019  		s.fr.highRxt = s.sndUna - 1
  1020  		// Do run SetPipe() to calculate the outstanding segments.
  1021  		s.SetPipe()
  1022  		s.state = Disorder
  1023  		return false
  1024  	}
  1025  
  1026  	// See: https://tools.ietf.org/html/rfc6582#section-3.2 Step 2
  1027  	//
  1028  	// We only do the check here, the incrementing of last to the highest
  1029  	// sequence number transmitted till now is done when enterFastRecovery
  1030  	// is invoked.
  1031  	if !s.fr.last.LessThan(seg.ackNumber) {
  1032  		s.dupAckCount = 0
  1033  		return false
  1034  	}
  1035  	s.cc.HandleNDupAcks()
  1036  	s.enterFastRecovery()
  1037  	s.dupAckCount = 0
  1038  	return true
  1039  }
  1040  
  1041  // handleRcvdSegment is called when a segment is received; it is responsible for
  1042  // updating the send-related state.
  1043  func (s *sender) handleRcvdSegment(seg *segment) {
  1044  	// Check if we can extract an RTT measurement from this ack.
  1045  	if !seg.parsedOptions.TS && s.rttMeasureSeqNum.LessThan(seg.ackNumber) {
  1046  		s.updateRTO(time.Now().Sub(s.rttMeasureTime))
  1047  		s.rttMeasureSeqNum = s.sndNxt
  1048  	}
  1049  
  1050  	// Update Timestamp if required. See RFC7323, section-4.3.
  1051  	if s.ep.sendTSOk && seg.parsedOptions.TS {
  1052  		s.ep.updateRecentTimestamp(seg.parsedOptions.TSVal, s.maxSentAck, seg.sequenceNumber)
  1053  	}
  1054  
  1055  	// Insert SACKBlock information into our scoreboard.
  1056  	if s.ep.sackPermitted {
  1057  		for _, sb := range seg.parsedOptions.SACKBlocks {
  1058  			// Only insert the SACK block if the following holds
  1059  			// true:
  1060  			//  * SACK block acks data after the ack number in the
  1061  			//    current segment.
  1062  			//  * SACK block represents a sequence
  1063  			//    between sndUna and sndNxt (i.e. data that is
  1064  			//    currently unacked and in-flight).
  1065  			//  * SACK block that has not been SACKed already.
  1066  			//
  1067  			// NOTE: This check specifically excludes DSACK blocks
  1068  			// which have start/end before sndUna and are used to
  1069  			// indicate spurious retransmissions.
  1070  			if seg.ackNumber.LessThan(sb.Start) && s.sndUna.LessThan(sb.Start) && sb.End.LessThanEq(s.sndNxt) && !s.ep.scoreboard.IsSACKED(sb) {
  1071  				s.ep.scoreboard.Insert(sb)
  1072  				seg.hasNewSACKInfo = true
  1073  			}
  1074  		}
  1075  		s.SetPipe()
  1076  	}
  1077  
  1078  	// Count the duplicates and do the fast retransmit if needed.
  1079  	rtx := s.checkDuplicateAck(seg)
  1080  
  1081  	// Stash away the current window size.
  1082  	s.sndWnd = seg.window
  1083  
  1084  	// Ignore ack if it doesn't acknowledge any new data.
  1085  	ack := seg.ackNumber
  1086  	if (ack - 1).InRange(s.sndUna, s.sndNxt) {
  1087  		s.dupAckCount = 0
  1088  
  1089  		// See : https://tools.ietf.org/html/rfc1323#section-3.3.
  1090  		// Specifically we should only update the RTO using TSEcr if the
  1091  		// following condition holds:
  1092  		//
  1093  		//    A TSecr value received in a segment is used to update the
  1094  		//    averaged RTT measurement only if the segment acknowledges
  1095  		//    some new data, i.e., only if it advances the left edge of
  1096  		//    the send window.
  1097  		if s.ep.sendTSOk && seg.parsedOptions.TSEcr != 0 {
  1098  			// TSVal/Ecr values sent by Netstack are at a millisecond
  1099  			// granularity.
  1100  			elapsed := time.Duration(s.ep.timestamp()-seg.parsedOptions.TSEcr) * time.Millisecond
  1101  			s.updateRTO(elapsed)
  1102  		}
  1103  
  1104  		// When an ack is received we must rearm the timer.
  1105  		// RFC 6298 5.2
  1106  		s.resendTimer.enable(s.rto)
  1107  
  1108  		// Remove all acknowledged data from the write list.
  1109  		acked := s.sndUna.Size(ack)
  1110  		s.sndUna = ack
  1111  
  1112  		ackLeft := acked
  1113  		originalOutstanding := s.outstanding
  1114  		for ackLeft > 0 {
  1115  			// We use logicalLen here because we can have FIN
  1116  			// segments (which are always at the end of list) that
  1117  			// have no data, but do consume a sequence number.
  1118  			seg := s.writeList.Front()
  1119  			datalen := seg.logicalLen()
  1120  
  1121  			if datalen > ackLeft {
  1122  				prevCount := s.pCount(seg)
  1123  				seg.data.TrimFront(int(ackLeft))
  1124  				seg.sequenceNumber.UpdateForward(ackLeft)
  1125  				s.outstanding -= prevCount - s.pCount(seg)
  1126  				break
  1127  			}
  1128  
  1129  			if s.writeNext == seg {
  1130  				s.writeNext = seg.Next()
  1131  			}
  1132  			s.writeList.Remove(seg)
  1133  
  1134  			// if SACK is enabled then Only reduce outstanding if
  1135  			// the segment was not previously SACKED as these have
  1136  			// already been accounted for in SetPipe().
  1137  			if !s.ep.sackPermitted || !s.ep.scoreboard.IsSACKED(seg.sackBlock()) {
  1138  				s.outstanding -= s.pCount(seg)
  1139  			}
  1140  			seg.decRef()
  1141  			ackLeft -= datalen
  1142  		}
  1143  
  1144  		// Update the send buffer usage and notify potential waiters.
  1145  		s.ep.updateSndBufferUsage(int(acked))
  1146  
  1147  		// Clear SACK information for all acked data.
  1148  		s.ep.scoreboard.Delete(s.sndUna)
  1149  
  1150  		// If we are not in fast recovery then update the congestion
  1151  		// window based on the number of acknowledged packets.
  1152  		if !s.fr.active {
  1153  			s.cc.Update(originalOutstanding - s.outstanding)
  1154  			if s.fr.last.LessThan(s.sndUna) {
  1155  				s.state = Open
  1156  			}
  1157  		}
  1158  
  1159  		// It is possible for s.outstanding to drop below zero if we get
  1160  		// a retransmit timeout, reset outstanding to zero but later
  1161  		// get an ack that cover previously sent data.
  1162  		if s.outstanding < 0 {
  1163  			s.outstanding = 0
  1164  		}
  1165  
  1166  		s.SetPipe()
  1167  
  1168  		// If all outstanding data was acknowledged the disable the timer.
  1169  		// RFC 6298 Rule 5.3
  1170  		if s.sndUna == s.sndNxt {
  1171  			s.outstanding = 0
  1172  			s.resendTimer.disable()
  1173  		}
  1174  	}
  1175  	// Now that we've popped all acknowledged data from the retransmit
  1176  	// queue, retransmit if needed.
  1177  	if rtx {
  1178  		s.resendSegment()
  1179  	}
  1180  
  1181  	// Send more data now that some of the pending data has been ack'd, or
  1182  	// that the window opened up, or the congestion window was inflated due
  1183  	// to a duplicate ack during fast recovery. This will also re-enable
  1184  	// the retransmit timer if needed.
  1185  	if !s.ep.sackPermitted || s.fr.active || s.dupAckCount == 0 || seg.hasNewSACKInfo {
  1186  		s.sendData()
  1187  	}
  1188  }
  1189  
  1190  // sendSegment sends the specified segment.
  1191  func (s *sender) sendSegment(seg *segment) *tcpip.Error {
  1192  	if !seg.xmitTime.IsZero() {
  1193  		s.ep.stack.Stats().TCP.Retransmits.Increment()
  1194  		s.ep.stats.SendErrors.Retransmits.Increment()
  1195  		if s.sndCwnd < s.sndSsthresh {
  1196  			s.ep.stack.Stats().TCP.SlowStartRetransmits.Increment()
  1197  		}
  1198  	}
  1199  	seg.xmitTime = time.Now()
  1200  	return s.sendSegmentFromView(seg.data, seg.flags, seg.sequenceNumber)
  1201  }
  1202  
  1203  // sendSegmentFromView sends a new segment containing the given payload, flags
  1204  // and sequence number.
  1205  func (s *sender) sendSegmentFromView(data buffer.VectorisedView, flags byte, seq seqnum.Value) *tcpip.Error {
  1206  	s.lastSendTime = time.Now()
  1207  	if seq == s.rttMeasureSeqNum {
  1208  		s.rttMeasureTime = s.lastSendTime
  1209  	}
  1210  
  1211  	rcvNxt, rcvWnd := s.ep.rcv.getSendParams()
  1212  
  1213  	// Remember the max sent ack.
  1214  	s.maxSentAck = rcvNxt
  1215  
  1216  	// Every time a packet containing data is sent (including a
  1217  	// retransmission), if SACK is enabled then use the conservative timer
  1218  	// described in RFC6675 Section 4.0, otherwise follow the standard time
  1219  	// described in RFC6298 Section 5.2.
  1220  	if data.Size() != 0 {
  1221  		if s.ep.sackPermitted {
  1222  			s.resendTimer.enable(s.rto)
  1223  		} else {
  1224  			if !s.resendTimer.enabled() {
  1225  				s.resendTimer.enable(s.rto)
  1226  			}
  1227  		}
  1228  	}
  1229  
  1230  	return s.ep.sendRaw(data, flags, seq, rcvNxt, rcvWnd)
  1231  }