gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/tcpip/transport/tcp/accept.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tcp
    16  
    17  import (
    18  	"container/list"
    19  	"crypto/sha1"
    20  	"encoding/binary"
    21  	"fmt"
    22  	"hash"
    23  	"io"
    24  	"time"
    25  
    26  	"gvisor.dev/gvisor/pkg/sync"
    27  	"gvisor.dev/gvisor/pkg/tcpip"
    28  	"gvisor.dev/gvisor/pkg/tcpip/header"
    29  	"gvisor.dev/gvisor/pkg/tcpip/ports"
    30  	"gvisor.dev/gvisor/pkg/tcpip/seqnum"
    31  	"gvisor.dev/gvisor/pkg/tcpip/stack"
    32  	"gvisor.dev/gvisor/pkg/waiter"
    33  )
    34  
    35  const (
    36  	// tsLen is the length, in bits, of the timestamp in the SYN cookie.
    37  	tsLen = 8
    38  
    39  	// tsMask is a mask for timestamp values (i.e., tsLen bits).
    40  	tsMask = (1 << tsLen) - 1
    41  
    42  	// tsOffset is the offset, in bits, of the timestamp in the SYN cookie.
    43  	tsOffset = 24
    44  
    45  	// hashMask is the mask for hash values (i.e., tsOffset bits).
    46  	hashMask = (1 << tsOffset) - 1
    47  
    48  	// maxTSDiff is the maximum allowed difference between a received cookie
    49  	// timestamp and the current timestamp. If the difference is greater
    50  	// than maxTSDiff, the cookie is expired.
    51  	maxTSDiff = 2
    52  )
    53  
    54  var (
    55  	// mssTable is a slice containing the possible MSS values that we
    56  	// encode in the SYN cookie with two bits.
    57  	mssTable = []uint16{536, 1300, 1440, 1460}
    58  )
    59  
    60  func encodeMSS(mss uint16) uint32 {
    61  	for i := len(mssTable) - 1; i > 0; i-- {
    62  		if mss >= mssTable[i] {
    63  			return uint32(i)
    64  		}
    65  	}
    66  	return 0
    67  }
    68  
    69  // listenContext is used by a listening endpoint to store state used while
    70  // listening for connections. This struct is allocated by the listen goroutine
    71  // and must not be accessed or have its methods called concurrently as they
    72  // may mutate the stored objects.
    73  type listenContext struct {
    74  	stack    *stack.Stack
    75  	protocol *protocol
    76  
    77  	// rcvWnd is the receive window that is sent by this listening context
    78  	// in the initial SYN-ACK.
    79  	rcvWnd seqnum.Size
    80  
    81  	// nonce are random bytes that are initialized once when the context
    82  	// is created and used to seed the hash function when generating
    83  	// the SYN cookie.
    84  	nonce [2][sha1.BlockSize]byte
    85  
    86  	// listenEP is a reference to the listening endpoint associated with
    87  	// this context. Can be nil if the context is created by the forwarder.
    88  	listenEP *Endpoint
    89  
    90  	// hasherMu protects hasher.
    91  	hasherMu sync.Mutex
    92  	// hasher is the hash function used to generate a SYN cookie.
    93  	hasher hash.Hash
    94  
    95  	// v6Only is true if listenEP is a dual stack socket and has the
    96  	// IPV6_V6ONLY option set.
    97  	v6Only bool
    98  
    99  	// netProto indicates the network protocol(IPv4/v6) for the listening
   100  	// endpoint.
   101  	netProto tcpip.NetworkProtocolNumber
   102  }
   103  
   104  // timeStamp returns an 8-bit timestamp with a granularity of 64 seconds.
   105  func timeStamp(clock tcpip.Clock) uint32 {
   106  	return uint32(clock.NowMonotonic().Sub(tcpip.MonotonicTime{}).Seconds()) >> 6 & tsMask
   107  }
   108  
   109  // newListenContext creates a new listen context.
   110  func newListenContext(stk *stack.Stack, protocol *protocol, listenEP *Endpoint, rcvWnd seqnum.Size, v6Only bool, netProto tcpip.NetworkProtocolNumber) *listenContext {
   111  	l := &listenContext{
   112  		stack:    stk,
   113  		protocol: protocol,
   114  		rcvWnd:   rcvWnd,
   115  		hasher:   sha1.New(),
   116  		v6Only:   v6Only,
   117  		netProto: netProto,
   118  		listenEP: listenEP,
   119  	}
   120  
   121  	for i := range l.nonce {
   122  		if _, err := io.ReadFull(stk.SecureRNG().Reader, l.nonce[i][:]); err != nil {
   123  			panic(err)
   124  		}
   125  	}
   126  
   127  	return l
   128  }
   129  
   130  // cookieHash calculates the cookieHash for the given id, timestamp and nonce
   131  // index. The hash is used to create and validate cookies.
   132  func (l *listenContext) cookieHash(id stack.TransportEndpointID, ts uint32, nonceIndex int) uint32 {
   133  
   134  	// Initialize block with fixed-size data: local ports and v.
   135  	var payload [8]byte
   136  	binary.BigEndian.PutUint16(payload[0:], id.LocalPort)
   137  	binary.BigEndian.PutUint16(payload[2:], id.RemotePort)
   138  	binary.BigEndian.PutUint32(payload[4:], ts)
   139  
   140  	// Feed everything to the hasher.
   141  	l.hasherMu.Lock()
   142  	l.hasher.Reset()
   143  
   144  	// Per hash.Hash.Writer:
   145  	//
   146  	// It never returns an error.
   147  	l.hasher.Write(payload[:])
   148  	l.hasher.Write(l.nonce[nonceIndex][:])
   149  	l.hasher.Write(id.LocalAddress.AsSlice())
   150  	l.hasher.Write(id.RemoteAddress.AsSlice())
   151  
   152  	// Finalize the calculation of the hash and return the first 4 bytes.
   153  	h := l.hasher.Sum(nil)
   154  	l.hasherMu.Unlock()
   155  
   156  	return binary.BigEndian.Uint32(h[:])
   157  }
   158  
   159  // createCookie creates a SYN cookie for the given id and incoming sequence
   160  // number.
   161  func (l *listenContext) createCookie(id stack.TransportEndpointID, seq seqnum.Value, data uint32) seqnum.Value {
   162  	ts := timeStamp(l.stack.Clock())
   163  	v := l.cookieHash(id, 0, 0) + uint32(seq) + (ts << tsOffset)
   164  	v += (l.cookieHash(id, ts, 1) + data) & hashMask
   165  	return seqnum.Value(v)
   166  }
   167  
   168  // isCookieValid checks if the supplied cookie is valid for the given id and
   169  // sequence number. If it is, it also returns the data originally encoded in the
   170  // cookie when createCookie was called.
   171  func (l *listenContext) isCookieValid(id stack.TransportEndpointID, cookie seqnum.Value, seq seqnum.Value) (uint32, bool) {
   172  	ts := timeStamp(l.stack.Clock())
   173  	v := uint32(cookie) - l.cookieHash(id, 0, 0) - uint32(seq)
   174  	cookieTS := v >> tsOffset
   175  	if ((ts - cookieTS) & tsMask) > maxTSDiff {
   176  		return 0, false
   177  	}
   178  
   179  	return (v - l.cookieHash(id, cookieTS, 1)) & hashMask, true
   180  }
   181  
   182  // createConnectingEndpoint creates a new endpoint in a connecting state, with
   183  // the connection parameters given by the arguments. The newly created endpoint
   184  // will be locked.
   185  // +checklocksacquire:n.mu
   186  func (l *listenContext) createConnectingEndpoint(s *segment, rcvdSynOpts header.TCPSynOptions, queue *waiter.Queue) (n *Endpoint, _ tcpip.Error) {
   187  	// Create a new endpoint.
   188  	netProto := l.netProto
   189  	if netProto == 0 {
   190  		netProto = s.pkt.NetworkProtocolNumber
   191  	}
   192  
   193  	route, err := l.stack.FindRoute(s.pkt.NICID, s.pkt.Network().DestinationAddress(), s.pkt.Network().SourceAddress(), s.pkt.NetworkProtocolNumber, false /* multicastLoop */)
   194  	if err != nil {
   195  		return nil, err // +checklocksignore
   196  	}
   197  
   198  	n = newEndpoint(l.stack, l.protocol, netProto, queue)
   199  	n.mu.Lock()
   200  	n.ops.SetV6Only(l.v6Only)
   201  	n.TransportEndpointInfo.ID = s.id
   202  	n.boundNICID = s.pkt.NICID
   203  	n.route = route
   204  	n.effectiveNetProtos = []tcpip.NetworkProtocolNumber{s.pkt.NetworkProtocolNumber}
   205  	n.ops.SetReceiveBufferSize(int64(l.rcvWnd), false /* notify */)
   206  	n.amss = calculateAdvertisedMSS(n.userMSS, n.route)
   207  	n.setEndpointState(StateConnecting)
   208  
   209  	n.maybeEnableTimestamp(rcvdSynOpts)
   210  	n.maybeEnableSACKPermitted(rcvdSynOpts)
   211  
   212  	n.initGSO()
   213  
   214  	// Bootstrap the auto tuning algorithm. Starting at zero will result in
   215  	// a large step function on the first window adjustment causing the
   216  	// window to grow to a really large value.
   217  	initWnd := n.initialReceiveWindow()
   218  	n.rcvQueueMu.Lock()
   219  	n.RcvAutoParams.PrevCopiedBytes = initWnd
   220  	n.rcvQueueMu.Unlock()
   221  
   222  	return n, nil
   223  }
   224  
   225  // startHandshake creates a new endpoint in connecting state and then sends
   226  // the SYN-ACK for the TCP 3-way handshake. It returns the state of the
   227  // handshake in progress, which includes the new endpoint in the SYN-RCVD
   228  // state.
   229  //
   230  // On success, a handshake h is returned.
   231  //
   232  // NOTE: h.ep.mu is not held and must be acquired if any state needs to be
   233  // modified.
   234  //
   235  // Precondition: if l.listenEP != nil, l.listenEP.mu must be locked.
   236  func (l *listenContext) startHandshake(s *segment, opts header.TCPSynOptions, queue *waiter.Queue, owner tcpip.PacketOwner) (h *handshake, _ tcpip.Error) {
   237  	// Create new endpoint.
   238  	irs := s.sequenceNumber
   239  	isn := generateSecureISN(s.id, l.stack.Clock(), l.protocol.seqnumSecret)
   240  	ep, err := l.createConnectingEndpoint(s, opts, queue)
   241  	if err != nil {
   242  		return nil, err // +checklocksignore
   243  	}
   244  
   245  	ep.owner = owner
   246  
   247  	// listenEP is nil when listenContext is used by tcp.Forwarder.
   248  	deferAccept := time.Duration(0)
   249  	if l.listenEP != nil {
   250  		if l.listenEP.EndpointState() != StateListen {
   251  
   252  			// Ensure we release any registrations done by the newly
   253  			// created endpoint.
   254  			ep.mu.Unlock()
   255  			ep.Close()
   256  
   257  			return nil, &tcpip.ErrConnectionAborted{} // +checklocksignore
   258  		}
   259  
   260  		// Propagate any inheritable options from the listening endpoint
   261  		// to the newly created endpoint.
   262  		l.listenEP.propagateInheritableOptionsLocked(ep) // +checklocksforce
   263  
   264  		if !ep.reserveTupleLocked() {
   265  			ep.mu.Unlock()
   266  			ep.Close()
   267  
   268  			return nil, &tcpip.ErrConnectionAborted{} // +checklocksignore
   269  		}
   270  
   271  		deferAccept = l.listenEP.deferAccept
   272  	}
   273  
   274  	// Register new endpoint so that packets are routed to it.
   275  	if err := ep.stack.RegisterTransportEndpoint(
   276  		ep.effectiveNetProtos,
   277  		ProtocolNumber,
   278  		ep.TransportEndpointInfo.ID,
   279  		ep,
   280  		ep.boundPortFlags,
   281  		ep.boundBindToDevice,
   282  	); err != nil {
   283  		ep.mu.Unlock()
   284  		ep.Close()
   285  
   286  		ep.drainClosingSegmentQueue()
   287  
   288  		return nil, err // +checklocksignore
   289  	}
   290  
   291  	ep.isRegistered = true
   292  
   293  	// Initialize and start the handshake.
   294  	h = ep.newPassiveHandshake(isn, irs, opts, deferAccept)
   295  	h.listenEP = l.listenEP
   296  	h.start()
   297  	h.ep.mu.Unlock()
   298  	return h, nil
   299  }
   300  
   301  // performHandshake performs a TCP 3-way handshake. On success, the new
   302  // established endpoint is returned.
   303  //
   304  // Precondition: if l.listenEP != nil, l.listenEP.mu must be locked.
   305  func (l *listenContext) performHandshake(s *segment, opts header.TCPSynOptions, queue *waiter.Queue, owner tcpip.PacketOwner) (*Endpoint, tcpip.Error) {
   306  	waitEntry, notifyCh := waiter.NewChannelEntry(waiter.WritableEvents)
   307  	queue.EventRegister(&waitEntry)
   308  	defer queue.EventUnregister(&waitEntry)
   309  
   310  	h, err := l.startHandshake(s, opts, queue, owner)
   311  	if err != nil {
   312  		return nil, err
   313  	}
   314  
   315  	// performHandshake is used by the Forwarder which will block till the
   316  	// handshake either succeeds or fails. We do this by registering for
   317  	// events above and block on the notification channel.
   318  	<-notifyCh
   319  
   320  	ep := h.ep
   321  	ep.mu.Lock()
   322  	if !ep.EndpointState().connected() {
   323  		ep.stack.Stats().TCP.FailedConnectionAttempts.Increment()
   324  		ep.stats.FailedConnectionAttempts.Increment()
   325  		ep.h = nil
   326  		ep.mu.Unlock()
   327  		ep.Close()
   328  		ep.notifyAborted()
   329  		ep.drainClosingSegmentQueue()
   330  		err := ep.LastError()
   331  		if err == nil {
   332  			// If err was nil then return the best error we can to indicate
   333  			// a connection failure.
   334  			err = &tcpip.ErrConnectionAborted{}
   335  		}
   336  		return nil, err
   337  	}
   338  
   339  	ep.isConnectNotified = true
   340  
   341  	// Transfer any state from the completed handshake to the endpoint.
   342  	//
   343  	// Update the receive window scaling. We can't do it before the
   344  	// handshake because it's possible that the peer doesn't support window
   345  	// scaling.
   346  	ep.rcv.RcvWndScale = ep.h.effectiveRcvWndScale()
   347  
   348  	// Clean up handshake state stored in the endpoint so that it can be
   349  	// GCed.
   350  	ep.h = nil
   351  	ep.mu.Unlock()
   352  	return ep, nil
   353  }
   354  
   355  // propagateInheritableOptionsLocked propagates any options set on the listening
   356  // endpoint to the newly created endpoint.
   357  //
   358  // +checklocks:e.mu
   359  // +checklocks:n.mu
   360  func (e *Endpoint) propagateInheritableOptionsLocked(n *Endpoint) {
   361  	n.userTimeout = e.userTimeout
   362  	n.portFlags = e.portFlags
   363  	n.boundBindToDevice = e.boundBindToDevice
   364  	n.boundPortFlags = e.boundPortFlags
   365  	n.userMSS = e.userMSS
   366  }
   367  
   368  // reserveTupleLocked reserves an accepted endpoint's tuple.
   369  //
   370  // Precondition: e.propagateInheritableOptionsLocked has been called.
   371  //
   372  // +checklocks:e.mu
   373  func (e *Endpoint) reserveTupleLocked() bool {
   374  	dest := tcpip.FullAddress{
   375  		Addr: e.TransportEndpointInfo.ID.RemoteAddress,
   376  		Port: e.TransportEndpointInfo.ID.RemotePort,
   377  	}
   378  	portRes := ports.Reservation{
   379  		Networks:     e.effectiveNetProtos,
   380  		Transport:    ProtocolNumber,
   381  		Addr:         e.TransportEndpointInfo.ID.LocalAddress,
   382  		Port:         e.TransportEndpointInfo.ID.LocalPort,
   383  		Flags:        e.boundPortFlags,
   384  		BindToDevice: e.boundBindToDevice,
   385  		Dest:         dest,
   386  	}
   387  	if !e.stack.ReserveTuple(portRes) {
   388  		e.stack.Stats().TCP.FailedPortReservations.Increment()
   389  		return false
   390  	}
   391  
   392  	e.isPortReserved = true
   393  	e.boundDest = dest
   394  	return true
   395  }
   396  
   397  // notifyAborted wakes up any waiters on registered, but not accepted
   398  // endpoints.
   399  //
   400  // This is strictly not required normally as a socket that was never accepted
   401  // can't really have any registered waiters except when stack.Wait() is called
   402  // which waits for all registered endpoints to stop and expects an EventHUp.
   403  func (e *Endpoint) notifyAborted() {
   404  	e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.ReadableEvents | waiter.WritableEvents)
   405  }
   406  
   407  func (e *Endpoint) acceptQueueIsFull() bool {
   408  	e.acceptMu.Lock()
   409  	full := e.acceptQueue.isFull()
   410  	e.acceptMu.Unlock()
   411  	return full
   412  }
   413  
   414  // +stateify savable
   415  type acceptQueue struct {
   416  	// NB: this could be an endpointList, but ilist only permits endpoints to
   417  	// belong to one list at a time, and endpoints are already stored in the
   418  	// dispatcher's list.
   419  	endpoints list.List `state:".([]*Endpoint)"`
   420  
   421  	// pendingEndpoints is a set of all endpoints for which a handshake is
   422  	// in progress.
   423  	pendingEndpoints map[*Endpoint]struct{}
   424  
   425  	// capacity is the maximum number of endpoints that can be in endpoints.
   426  	capacity int
   427  }
   428  
   429  func (a *acceptQueue) isFull() bool {
   430  	return a.endpoints.Len() >= a.capacity
   431  }
   432  
   433  // handleListenSegment is called when a listening endpoint receives a segment
   434  // and needs to handle it.
   435  //
   436  // +checklocks:e.mu
   437  func (e *Endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Error {
   438  	e.rcvQueueMu.Lock()
   439  	rcvClosed := e.RcvClosed
   440  	e.rcvQueueMu.Unlock()
   441  	if rcvClosed || s.flags.Contains(header.TCPFlagSyn|header.TCPFlagAck) {
   442  		// If the endpoint is shutdown, reply with reset.
   443  		//
   444  		// RFC 793 section 3.4 page 35 (figure 12) outlines that a RST
   445  		// must be sent in response to a SYN-ACK while in the listen
   446  		// state to prevent completing a handshake from an old SYN.
   447  		return replyWithReset(e.stack, s, e.sendTOS, e.ipv4TTL, e.ipv6HopLimit)
   448  	}
   449  
   450  	switch {
   451  	case s.flags.Contains(header.TCPFlagRst):
   452  		e.stack.Stats().DroppedPackets.Increment()
   453  		return nil
   454  
   455  	case s.flags.Contains(header.TCPFlagSyn):
   456  		if e.acceptQueueIsFull() {
   457  			e.stack.Stats().TCP.ListenOverflowSynDrop.Increment()
   458  			e.stats.ReceiveErrors.ListenOverflowSynDrop.Increment()
   459  			e.stack.Stats().DroppedPackets.Increment()
   460  			return nil
   461  		}
   462  
   463  		opts := parseSynSegmentOptions(s)
   464  
   465  		useSynCookies, err := func() (bool, tcpip.Error) {
   466  			var alwaysUseSynCookies tcpip.TCPAlwaysUseSynCookies
   467  			if err := e.stack.TransportProtocolOption(header.TCPProtocolNumber, &alwaysUseSynCookies); err != nil {
   468  				panic(fmt.Sprintf("TransportProtocolOption(%d, %T) = %s", header.TCPProtocolNumber, alwaysUseSynCookies, err))
   469  			}
   470  			if alwaysUseSynCookies {
   471  				return true, nil
   472  			}
   473  			e.acceptMu.Lock()
   474  			defer e.acceptMu.Unlock()
   475  
   476  			// The capacity of the accepted queue would always be one greater than the
   477  			// listen backlog. But, the SYNRCVD connections count is always checked
   478  			// against the listen backlog value for Linux parity reason.
   479  			// https://github.com/torvalds/linux/blob/7acac4b3196/include/net/inet_connection_sock.h#L280
   480  			if len(e.acceptQueue.pendingEndpoints) == e.acceptQueue.capacity-1 {
   481  				return true, nil
   482  			}
   483  
   484  			h, err := ctx.startHandshake(s, opts, &waiter.Queue{}, e.owner)
   485  			if err != nil {
   486  				e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
   487  				e.stats.FailedConnectionAttempts.Increment()
   488  				return false, err
   489  			}
   490  			e.acceptQueue.pendingEndpoints[h.ep] = struct{}{}
   491  
   492  			return false, nil
   493  		}()
   494  		if err != nil {
   495  			return err
   496  		}
   497  		if !useSynCookies {
   498  			return nil
   499  		}
   500  
   501  		net := s.pkt.Network()
   502  		route, err := e.stack.FindRoute(s.pkt.NICID, net.DestinationAddress(), net.SourceAddress(), s.pkt.NetworkProtocolNumber, false /* multicastLoop */)
   503  		if err != nil {
   504  			return err
   505  		}
   506  		defer route.Release()
   507  
   508  		// Send SYN without window scaling because we currently
   509  		// don't encode this information in the cookie.
   510  		//
   511  		// Enable Timestamp option if the original syn did have
   512  		// the timestamp option specified.
   513  		//
   514  		// Use the user supplied MSS on the listening socket for
   515  		// new connections, if available.
   516  		synOpts := header.TCPSynOptions{
   517  			WS:    -1,
   518  			TS:    opts.TS,
   519  			TSEcr: opts.TSVal,
   520  			MSS:   calculateAdvertisedMSS(e.userMSS, route),
   521  		}
   522  		if opts.TS {
   523  			offset := e.protocol.tsOffset(net.DestinationAddress(), net.SourceAddress())
   524  			now := e.stack.Clock().NowMonotonic()
   525  			synOpts.TSVal = offset.TSVal(now)
   526  		}
   527  		cookie := ctx.createCookie(s.id, s.sequenceNumber, encodeMSS(opts.MSS))
   528  		fields := tcpFields{
   529  			id:     s.id,
   530  			ttl:    calculateTTL(route, e.ipv4TTL, e.ipv6HopLimit),
   531  			tos:    e.sendTOS,
   532  			flags:  header.TCPFlagSyn | header.TCPFlagAck,
   533  			seq:    cookie,
   534  			ack:    s.sequenceNumber + 1,
   535  			rcvWnd: ctx.rcvWnd,
   536  		}
   537  		if err := e.sendSynTCP(route, fields, synOpts); err != nil {
   538  			return err
   539  		}
   540  		e.stack.Stats().TCP.ListenOverflowSynCookieSent.Increment()
   541  		return nil
   542  
   543  	case s.flags.Contains(header.TCPFlagAck):
   544  		iss := s.ackNumber - 1
   545  		irs := s.sequenceNumber - 1
   546  
   547  		// As an edge case when SYN-COOKIES are in use and we receive a
   548  		// segment that has data and is valid we should check if it
   549  		// already matches a created endpoint and redirect the segment
   550  		// rather than try and create a new endpoint. This can happen
   551  		// where the final ACK for the handshake and other data packets
   552  		// arrive at the same time and are queued to the listening
   553  		// endpoint before the listening endpoint has had time to
   554  		// process the first ACK and create the endpoint that matches
   555  		// the incoming packet's full 5 tuple.
   556  		netProtos := []tcpip.NetworkProtocolNumber{s.pkt.NetworkProtocolNumber}
   557  		// If the local address is an IPv4 Address then also look for IPv6
   558  		// dual stack endpoints.
   559  		if s.id.LocalAddress.To4() != (tcpip.Address{}) {
   560  			netProtos = []tcpip.NetworkProtocolNumber{header.IPv4ProtocolNumber, header.IPv6ProtocolNumber}
   561  		}
   562  		for _, netProto := range netProtos {
   563  			if newEP := e.stack.FindTransportEndpoint(netProto, ProtocolNumber, s.id, s.pkt.NICID); newEP != nil && newEP != e {
   564  				tcpEP := newEP.(*Endpoint)
   565  				if !tcpEP.EndpointState().connected() {
   566  					continue
   567  				}
   568  				if !tcpEP.enqueueSegment(s) {
   569  					// Just silently drop the segment as we failed
   570  					// to queue, we don't want to generate a RST
   571  					// further below or try and create a new
   572  					// endpoint etc.
   573  					return nil
   574  				}
   575  				tcpEP.notifyProcessor()
   576  				return nil
   577  			}
   578  		}
   579  
   580  		// Since SYN cookies are in use this is potentially an ACK to a
   581  		// SYN-ACK we sent but don't have a half open connection state
   582  		// as cookies are being used to protect against a potential SYN
   583  		// flood. In such cases validate the cookie and if valid create
   584  		// a fully connected endpoint and deliver to the accept queue.
   585  		//
   586  		// If not, silently drop the ACK to avoid leaking information
   587  		// when under a potential syn flood attack.
   588  		//
   589  		// Validate the cookie.
   590  		data, ok := ctx.isCookieValid(s.id, iss, irs)
   591  		if !ok || int(data) >= len(mssTable) {
   592  			e.stack.Stats().TCP.ListenOverflowInvalidSynCookieRcvd.Increment()
   593  			e.stack.Stats().DroppedPackets.Increment()
   594  
   595  			// When not using SYN cookies, as per RFC 793, section 3.9, page 64:
   596  			// Any acknowledgment is bad if it arrives on a connection still in
   597  			// the LISTEN state.  An acceptable reset segment should be formed
   598  			// for any arriving ACK-bearing segment.  The RST should be
   599  			// formatted as follows:
   600  			//
   601  			//  <SEQ=SEG.ACK><CTL=RST>
   602  			//
   603  			// Send a reset as this is an ACK for which there is no
   604  			// half open connections and we are not using cookies
   605  			// yet.
   606  			//
   607  			// The only time we should reach here when a connection
   608  			// was opened and closed really quickly and a delayed
   609  			// ACK was received from the sender.
   610  			return replyWithReset(e.stack, s, e.sendTOS, e.ipv4TTL, e.ipv6HopLimit)
   611  		}
   612  
   613  		// Keep hold of acceptMu until the new endpoint is in the accept queue (or
   614  		// if there is an error), to guarantee that we will keep our spot in the
   615  		// queue even if another handshake from the syn queue completes.
   616  		e.acceptMu.Lock()
   617  		if e.acceptQueue.isFull() {
   618  			// Silently drop the ack as the application can't accept
   619  			// the connection at this point. The ack will be
   620  			// retransmitted by the sender anyway and we can
   621  			// complete the connection at the time of retransmit if
   622  			// the backlog has space.
   623  			e.acceptMu.Unlock()
   624  			e.stack.Stats().TCP.ListenOverflowAckDrop.Increment()
   625  			e.stats.ReceiveErrors.ListenOverflowAckDrop.Increment()
   626  			e.stack.Stats().DroppedPackets.Increment()
   627  			return nil
   628  		}
   629  
   630  		e.stack.Stats().TCP.ListenOverflowSynCookieRcvd.Increment()
   631  		// Create newly accepted endpoint and deliver it.
   632  		rcvdSynOptions := header.TCPSynOptions{
   633  			MSS: mssTable[data],
   634  			// Disable Window scaling as original SYN is
   635  			// lost.
   636  			WS: -1,
   637  		}
   638  
   639  		// When syn cookies are in use we enable timestamp only
   640  		// if the ack specifies the timestamp option assuming
   641  		// that the other end did in fact negotiate the
   642  		// timestamp option in the original SYN.
   643  		if s.parsedOptions.TS {
   644  			rcvdSynOptions.TS = true
   645  			rcvdSynOptions.TSVal = s.parsedOptions.TSVal
   646  			rcvdSynOptions.TSEcr = s.parsedOptions.TSEcr
   647  		}
   648  
   649  		n, err := ctx.createConnectingEndpoint(s, rcvdSynOptions, &waiter.Queue{})
   650  		if err != nil {
   651  			e.acceptMu.Unlock()
   652  			return err
   653  		}
   654  
   655  		// Propagate any inheritable options from the listening endpoint
   656  		// to the newly created endpoint.
   657  		e.propagateInheritableOptionsLocked(n)
   658  
   659  		if !n.reserveTupleLocked() {
   660  			n.mu.Unlock()
   661  			e.acceptMu.Unlock()
   662  			n.Close()
   663  
   664  			e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
   665  			e.stats.FailedConnectionAttempts.Increment()
   666  			return nil
   667  		}
   668  
   669  		// Register new endpoint so that packets are routed to it.
   670  		if err := n.stack.RegisterTransportEndpoint(
   671  			n.effectiveNetProtos,
   672  			ProtocolNumber,
   673  			n.TransportEndpointInfo.ID,
   674  			n,
   675  			n.boundPortFlags,
   676  			n.boundBindToDevice,
   677  		); err != nil {
   678  			n.mu.Unlock()
   679  			e.acceptMu.Unlock()
   680  			n.Close()
   681  
   682  			e.stack.Stats().TCP.FailedConnectionAttempts.Increment()
   683  			e.stats.FailedConnectionAttempts.Increment()
   684  			return err
   685  		}
   686  
   687  		n.isRegistered = true
   688  		net := s.pkt.Network()
   689  		n.TSOffset = n.protocol.tsOffset(net.DestinationAddress(), net.SourceAddress())
   690  
   691  		// Switch state to connected.
   692  		n.isConnectNotified = true
   693  		h := handshake{
   694  			ep:                  n,
   695  			iss:                 iss,
   696  			ackNum:              irs + 1,
   697  			rcvWnd:              seqnum.Size(n.initialReceiveWindow()),
   698  			sndWnd:              s.window,
   699  			rcvWndScale:         e.rcvWndScaleForHandshake(),
   700  			sndWndScale:         rcvdSynOptions.WS,
   701  			mss:                 rcvdSynOptions.MSS,
   702  			sampleRTTWithTSOnly: true,
   703  		}
   704  		h.ep.AssertLockHeld(n)
   705  		h.transitionToStateEstablishedLocked(s)
   706  		n.mu.Unlock()
   707  
   708  		// Requeue the segment if the ACK completing the handshake has more info
   709  		// to be processed by the newly established endpoint.
   710  		if (s.flags.Contains(header.TCPFlagFin) || s.payloadSize() > 0) && n.enqueueSegment(s) {
   711  			n.notifyProcessor()
   712  		}
   713  
   714  		e.stack.Stats().TCP.PassiveConnectionOpenings.Increment()
   715  
   716  		// Deliver the endpoint to the accept queue.
   717  		e.acceptQueue.endpoints.PushBack(n)
   718  		e.acceptMu.Unlock()
   719  
   720  		e.waiterQueue.Notify(waiter.ReadableEvents)
   721  		return nil
   722  
   723  	default:
   724  		e.stack.Stats().DroppedPackets.Increment()
   725  		return nil
   726  	}
   727  }